| |
| <!doctype html> |
| <html lang="en" class="no-js"> |
| <head> |
| |
| <meta charset="utf-8"> |
| <meta name="viewport" content="width=device-width,initial-scale=1"> |
| |
| |
| |
| <link rel="canonical" href="https://py.iceberg.apache.org/api/"> |
| |
| |
| <link rel="prev" href="../cli/"> |
| |
| |
| <link rel="next" href="../row-filter-syntax/"> |
| |
| |
| |
| |
| |
| <link rel="icon" href="../assets/images/iceberg-logo-icon.png"> |
| <meta name="generator" content="mkdocs-1.6.1, mkdocs-material-9.7.1"> |
| |
| |
| |
| <title>API - PyIceberg</title> |
| |
| |
| |
| <link rel="stylesheet" href="../assets/stylesheets/main.484c7ddc.min.css"> |
| |
| |
| <link rel="stylesheet" href="../assets/stylesheets/palette.ab4e12ef.min.css"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin> |
| <link rel="stylesheet" href="https://fonts.googleapis.com/css?family=Lato:300,300i,400,400i,700,700i%7CRoboto+Mono:400,400i,700,700i&display=fallback"> |
| <style>:root{--md-text-font:"Lato";--md-code-font:"Roboto Mono"}</style> |
| |
| |
| |
| <link rel="stylesheet" href="../assets/_mkdocstrings.css"> |
| |
| <script>__md_scope=new URL("..",location),__md_hash=e=>[...e].reduce(((e,_)=>(e<<5)-e+_.charCodeAt(0)),0),__md_get=(e,_=localStorage,t=__md_scope)=>JSON.parse(_.getItem(t.pathname+"."+e)),__md_set=(e,_,t=localStorage,a=__md_scope)=>{try{t.setItem(a.pathname+"."+e,JSON.stringify(_))}catch(e){}}</script> |
| |
| |
| |
| |
| |
| |
| <!-- Matomo --> |
| <script> |
| var _paq = window._paq = window._paq || []; |
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */ |
| _paq.push(["setDoNotTrack", true]); |
| _paq.push(["disableCookies"]); |
| _paq.push(['trackPageView']); |
| _paq.push(['enableLinkTracking']); |
| (function() { |
| var u="https://analytics.apache.org/"; |
| _paq.push(['setTrackerUrl', u+'matomo.php']); |
| _paq.push(['setSiteId', '82']); |
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0]; |
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s); |
| })(); |
| </script> |
| <!-- End Matomo --> |
| |
| </head> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <body dir="ltr" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo"> |
| |
| |
| <input class="md-toggle" data-md-toggle="drawer" type="checkbox" id="__drawer" autocomplete="off"> |
| <input class="md-toggle" data-md-toggle="search" type="checkbox" id="__search" autocomplete="off"> |
| <label class="md-overlay" for="__drawer"></label> |
| <div data-md-component="skip"> |
| |
| |
| <a href="#python-api" class="md-skip"> |
| Skip to content |
| </a> |
| |
| </div> |
| <div data-md-component="announce"> |
| |
| </div> |
| |
| |
| |
| |
| |
| |
| <header class="md-header md-header--shadow md-header--lifted" data-md-component="header"> |
| <nav class="md-header__inner md-grid" aria-label="Header"> |
| <a href=".." title="PyIceberg" class="md-header__button md-logo" aria-label="PyIceberg" data-md-component="logo"> |
| |
| <img src="../assets/images/iceberg-logo-icon.png" alt="logo"> |
| |
| </a> |
| <label class="md-header__button md-icon" for="__drawer"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M3 6h18v2H3zm0 5h18v2H3zm0 5h18v2H3z"/></svg> |
| </label> |
| <div class="md-header__title" data-md-component="header-title"> |
| <div class="md-header__ellipsis"> |
| <div class="md-header__topic"> |
| <span class="md-ellipsis"> |
| PyIceberg |
| </span> |
| </div> |
| <div class="md-header__topic" data-md-component="header-topic"> |
| <span class="md-ellipsis"> |
| |
| API |
| |
| </span> |
| </div> |
| </div> |
| </div> |
| |
| |
| <form class="md-header__option" data-md-component="palette"> |
| |
| |
| |
| |
| <input class="md-option" data-md-color-media="" data-md-color-scheme="default" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to dark mode" type="radio" name="__palette" id="__palette_0"> |
| |
| <label class="md-header__button md-icon" title="Switch to dark mode" for="__palette_1" hidden> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 8a4 4 0 0 0-4 4 4 4 0 0 0 4 4 4 4 0 0 0 4-4 4 4 0 0 0-4-4m0 10a6 6 0 0 1-6-6 6 6 0 0 1 6-6 6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg> |
| </label> |
| |
| |
| |
| |
| |
| <input class="md-option" data-md-color-media="" data-md-color-scheme="slate" data-md-color-primary="indigo" data-md-color-accent="indigo" aria-label="Switch to light mode" type="radio" name="__palette" id="__palette_1"> |
| |
| <label class="md-header__button md-icon" title="Switch to light mode" for="__palette_0" hidden> |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M12 18c-.89 0-1.74-.2-2.5-.55C11.56 16.5 13 14.42 13 12s-1.44-4.5-3.5-5.45C10.26 6.2 11.11 6 12 6a6 6 0 0 1 6 6 6 6 0 0 1-6 6m8-9.31V4h-4.69L12 .69 8.69 4H4v4.69L.69 12 4 15.31V20h4.69L12 23.31 15.31 20H20v-4.69L23.31 12z"/></svg> |
| </label> |
| |
| |
| </form> |
| |
| |
| |
| <script>var palette=__md_get("__palette");if(palette&&palette.color){if("(prefers-color-scheme)"===palette.color.media){var media=matchMedia("(prefers-color-scheme: light)"),input=document.querySelector(media.matches?"[data-md-color-media='(prefers-color-scheme: light)']":"[data-md-color-media='(prefers-color-scheme: dark)']");palette.color.media=input.getAttribute("data-md-color-media"),palette.color.scheme=input.getAttribute("data-md-color-scheme"),palette.color.primary=input.getAttribute("data-md-color-primary"),palette.color.accent=input.getAttribute("data-md-color-accent")}for(var[key,value]of Object.entries(palette.color))document.body.setAttribute("data-md-color-"+key,value)}</script> |
| |
| |
| |
| |
| |
| <label class="md-header__button md-icon" for="__search"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg> |
| </label> |
| <div class="md-search" data-md-component="search" role="dialog"> |
| <label class="md-search__overlay" for="__search"></label> |
| <div class="md-search__inner" role="search"> |
| <form class="md-search__form" name="search"> |
| <input type="text" class="md-search__input" name="query" aria-label="Search" placeholder="Search" autocapitalize="off" autocorrect="off" autocomplete="off" spellcheck="false" data-md-component="search-query" required> |
| <label class="md-search__icon md-icon" for="__search"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M9.5 3A6.5 6.5 0 0 1 16 9.5c0 1.61-.59 3.09-1.56 4.23l.27.27h.79l5 5-1.5 1.5-5-5v-.79l-.27-.27A6.52 6.52 0 0 1 9.5 16 6.5 6.5 0 0 1 3 9.5 6.5 6.5 0 0 1 9.5 3m0 2C7 5 5 7 5 9.5S7 14 9.5 14 14 12 14 9.5 12 5 9.5 5"/></svg> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M20 11v2H8l5.5 5.5-1.42 1.42L4.16 12l7.92-7.92L13.5 5.5 8 11z"/></svg> |
| </label> |
| <nav class="md-search__options" aria-label="Search"> |
| |
| <button type="reset" class="md-search__icon md-icon" title="Clear" aria-label="Clear" tabindex="-1"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M19 6.41 17.59 5 12 10.59 6.41 5 5 6.41 10.59 12 5 17.59 6.41 19 12 13.41 17.59 19 19 17.59 13.41 12z"/></svg> |
| </button> |
| </nav> |
| |
| </form> |
| <div class="md-search__output"> |
| <div class="md-search__scrollwrap" tabindex="0" data-md-scrollfix> |
| <div class="md-search-result" data-md-component="search-result"> |
| <div class="md-search-result__meta"> |
| Initializing search |
| </div> |
| <ol class="md-search-result__list" role="presentation"></ol> |
| </div> |
| </div> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| <div class="md-header__source"> |
| <a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source"> |
| <div class="md-source__icon md-icon"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg> |
| </div> |
| <div class="md-source__repository"> |
| apache/iceberg-python |
| </div> |
| </a> |
| </div> |
| |
| </nav> |
| |
| |
| |
| <nav class="md-tabs" aria-label="Tabs" data-md-component="tabs"> |
| <div class="md-grid"> |
| <ul class="md-tabs__list"> |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href=".." class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Getting started |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../configuration/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Configuration |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../cli/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| CLI |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item md-tabs__item--active"> |
| <a href="./" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| API |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../contributing/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Contributing |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../community/" class="md-tabs__link"> |
| |
| |
| |
| |
| |
| Community |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../verify-release/" class="md-tabs__link"> |
| |
| |
| |
| Releases |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-tabs__item"> |
| <a href="../reference/pyiceberg/" class="md-tabs__link"> |
| |
| |
| |
| Code Reference |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </div> |
| </nav> |
| |
| |
| </header> |
| |
| <div class="md-container" data-md-component="container"> |
| |
| |
| |
| |
| <main class="md-main" data-md-component="main"> |
| <div class="md-main__inner md-grid"> |
| |
| |
| |
| |
| |
| <div class="md-sidebar md-sidebar--primary" data-md-component="sidebar" data-md-type="navigation" hidden> |
| <div class="md-sidebar__scrollwrap"> |
| <div class="md-sidebar__inner"> |
| |
| |
| |
| |
| |
| |
| <nav class="md-nav md-nav--primary md-nav--lifted" aria-label="Navigation" data-md-level="0"> |
| <label class="md-nav__title" for="__drawer"> |
| <a href=".." title="PyIceberg" class="md-nav__button md-logo" aria-label="PyIceberg" data-md-component="logo"> |
| |
| <img src="../assets/images/iceberg-logo-icon.png" alt="logo"> |
| |
| </a> |
| PyIceberg |
| </label> |
| |
| <div class="md-nav__source"> |
| <a href="https://github.com/apache/iceberg-python" title="Go to repository" class="md-source" data-md-component="source"> |
| <div class="md-source__icon md-icon"> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Free 7.1.0 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license/free (Icons: CC BY 4.0, Fonts: SIL OFL 1.1, Code: MIT License) Copyright 2025 Fonticons, Inc.--><path d="M439.6 236.1 244 40.5c-5.4-5.5-12.8-8.5-20.4-8.5s-15 3-20.4 8.4L162.5 81l51.5 51.5c27.1-9.1 52.7 16.8 43.4 43.7l49.7 49.7c34.2-11.8 61.2 31 35.5 56.7-26.5 26.5-70.2-2.9-56-37.3L240.3 199v121.9c25.3 12.5 22.3 41.8 9.1 55-6.4 6.4-15.2 10.1-24.3 10.1s-17.8-3.6-24.3-10.1c-17.6-17.6-11.1-46.9 11.2-56v-123c-20.8-8.5-24.6-30.7-18.6-45L142.6 101 8.5 235.1C3 240.6 0 247.9 0 255.5s3 15 8.5 20.4l195.6 195.7c5.4 5.4 12.7 8.4 20.4 8.4s15-3 20.4-8.4l194.7-194.7c5.4-5.4 8.4-12.8 8.4-20.4s-3-15-8.4-20.4"/></svg> |
| </div> |
| <div class="md-source__repository"> |
| apache/iceberg-python |
| </div> |
| </a> |
| </div> |
| |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href=".." class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Getting started |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../configuration/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Configuration |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../cli/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| CLI |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--active md-nav__item--section md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_4" checked> |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="./" class="md-nav__link md-nav__link--active"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| API |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link md-nav__link--active" for="__nav_4" id="__nav_4_label" tabindex=""> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_4_label" aria-expanded="true"> |
| <label class="md-nav__title" for="__nav_4"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| API |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../row-filter-syntax/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Row Filter Syntax |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../expression-dsl/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Expression DSL |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../contributing/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Contributing |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../community/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Community |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_7" > |
| |
| |
| <label class="md-nav__link" for="__nav_7" id="__nav_7_label" tabindex="0"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Releases |
| |
| |
| |
| </span> |
| |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_7_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_7"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| Releases |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../verify-release/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Verify a release |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../how-to-release/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| How to release |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="https://github.com/apache/iceberg-python/releases" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Release Notes |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../nightly-build/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Nightly Build |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8" > |
| |
| |
| <label class="md-nav__link" for="__nav_8" id="__nav_8_label" tabindex="0"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| Code Reference |
| |
| |
| |
| </span> |
| |
| |
| |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| <nav class="md-nav" data-md-level="1" aria-labelledby="__nav_8_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| Code Reference |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| pyiceberg |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1" id="__nav_8_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="2" aria-labelledby="__nav_8_1_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| pyiceberg |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/avro/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| avro |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_1" id="__nav_8_1_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_1_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_1"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| avro |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_1_1" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/avro/codecs/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| codecs |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_1_1" id="__nav_8_1_1_1_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_1_1_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_1_1"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| codecs |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/codecs/bzip2/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| bzip2 |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/codecs/codec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| codec |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/codecs/deflate/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| deflate |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/codecs/snappy_codec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| snappy_codec |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/codecs/zstandard_codec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| zstandard_codec |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/decoder/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| decoder |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/encoder/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| encoder |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/file/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| file |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/reader/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| reader |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/resolver/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| resolver |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/avro/writer/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| writer |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/catalog/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| catalog |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_2" id="__nav_8_1_2_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_2_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_2"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| catalog |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/bigquery_metastore/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| bigquery_metastore |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/dynamodb/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| dynamodb |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/glue/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| glue |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/hive/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| hive |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/memory/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| memory |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/noop/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| noop |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_2_7" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/catalog/rest/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| rest |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_2_7" id="__nav_8_1_2_7_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_2_7_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_2_7"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| rest |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/rest/auth/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| auth |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/rest/response/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| response |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/rest/scan_planning/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| scan_planning |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/catalog/sql/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| sql |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_3" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/cli/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| cli |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_3" id="__nav_8_1_3_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_3_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_3"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| cli |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/cli/console/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| console |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/cli/output/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| output |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/conversions/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| conversions |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/exceptions/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| exceptions |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_6" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/expressions/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| expressions |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_6" id="__nav_8_1_6_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_6_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_6"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| expressions |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/expressions/literals/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| literals |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/expressions/parser/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| parser |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/expressions/visitors/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| visitors |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_7" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/io/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| io |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_7" id="__nav_8_1_7_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_7_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_7"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| io |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/io/fsspec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| fsspec |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/io/pyarrow/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| pyarrow |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/manifest/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| manifest |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/partitioning/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| partitioning |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/schema/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| schema |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/serializers/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| serializers |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/table/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| table |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_12" id="__nav_8_1_12_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_12_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_12"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| table |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/delete_file_index/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| delete_file_index |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/inspect/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| inspect |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/locations/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| locations |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/maintenance/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| maintenance |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/metadata/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| metadata |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/name_mapping/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| name_mapping |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/puffin/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| puffin |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/refs/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| refs |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/snapshots/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| snapshots |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/sorting/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| sorting |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/statistics/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| statistics |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_12_12" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/table/update/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| update |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_12_12" id="__nav_8_1_12_12_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="4" aria-labelledby="__nav_8_1_12_12_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_12_12"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| update |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/update/schema/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| schema |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/update/snapshot/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| snapshot |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/update/sorting/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| sorting |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/update/spec/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| spec |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/update/statistics/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| statistics |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/update/validate/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| validate |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/table/upsert_util/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| upsert_util |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/transforms/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| transforms |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/typedef/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| typedef |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/types/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| types |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item md-nav__item--nested"> |
| |
| |
| |
| <input class="md-nav__toggle md-toggle " type="checkbox" id="__nav_8_1_16" > |
| |
| |
| <div class="md-nav__link md-nav__container"> |
| <a href="../reference/pyiceberg/utils/" class="md-nav__link "> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| utils |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| |
| |
| <label class="md-nav__link " for="__nav_8_1_16" id="__nav_8_1_16_label" tabindex="0"> |
| <span class="md-nav__icon md-icon"></span> |
| </label> |
| |
| </div> |
| |
| <nav class="md-nav" data-md-level="3" aria-labelledby="__nav_8_1_16_label" aria-expanded="false"> |
| <label class="md-nav__title" for="__nav_8_1_16"> |
| <span class="md-nav__icon md-icon"></span> |
| |
| |
| utils |
| |
| |
| </label> |
| <ul class="md-nav__list" data-md-scrollfix> |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/bin_packing/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| bin_packing |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/concurrent/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| concurrent |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/config/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| config |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/datetime/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| datetime |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/decimal/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| decimal |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/deprecated/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| deprecated |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/lazydict/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| lazydict |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/parsing/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| parsing |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/properties/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| properties |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/schema_conversion/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| schema_conversion |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/singleton/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| singleton |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <li class="md-nav__item"> |
| <a href="../reference/pyiceberg/utils/truncate/" class="md-nav__link"> |
| |
| |
| |
| <span class="md-ellipsis"> |
| |
| |
| truncate |
| |
| |
| |
| </span> |
| |
| |
| |
| </a> |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| |
| |
| </ul> |
| </nav> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| |
| |
| <div class="md-sidebar md-sidebar--secondary" data-md-component="sidebar" data-md-type="toc" > |
| <div class="md-sidebar__scrollwrap"> |
| <div class="md-sidebar__inner"> |
| |
| |
| <nav class="md-nav md-nav--secondary" aria-label="Table of contents"> |
| |
| |
| |
| |
| |
| |
| <label class="md-nav__title" for="__toc"> |
| <span class="md-nav__icon md-icon"></span> |
| Table of contents |
| </label> |
| <ul class="md-nav__list" data-md-component="toc" data-md-scrollfix> |
| |
| <li class="md-nav__item"> |
| <a href="#create-a-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Create a table |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#register-a-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Register a table |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#load-a-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Load a table |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Load a table"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#catalog-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Catalog table |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#static-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Static table |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#check-if-a-table-exists" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Check if a table exists |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#rename-a-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Rename a table |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#drop-a-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Drop a table |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#write-to-a-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Write to a table |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Write to a table"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#partial-overwrites" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Partial overwrites |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#upsert" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Upsert |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#inspecting-tables" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Inspecting tables |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Inspecting tables"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#snapshots" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Snapshots |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#partitions" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Partitions |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#entries" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Entries |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#references" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| References |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#manifests" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Manifests |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#metadata-log-entries" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Metadata Log Entries |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#history" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| History |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#files" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Files |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#add-files" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Add Files |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Add Files"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#usage" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Usage |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#example" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Example |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#schema-evolution" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Schema evolution |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Schema evolution"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#union-by-name" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Union by Name |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#add-column" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Add column |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#rename-column" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Rename column |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#move-column" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Move column |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#update-column" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Update column |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#delete-column" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Delete column |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#partition-evolution" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Partition evolution |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Partition evolution"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#add-fields" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Add fields |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#remove-fields" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Remove fields |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#rename-fields" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Rename fields |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#sort-order-updates" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Sort order updates |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Sort order updates"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#updating-a-sort-order-on-a-table" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Updating a sort order on a table |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#table-properties" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Table properties |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#snapshot-properties" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Snapshot properties |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#snapshot-management" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Snapshot Management |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Snapshot Management"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#tags" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Tags |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#branching" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Branching |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#table-maintenance" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Table Maintenance |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Table Maintenance"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#snapshot-expiration" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Snapshot Expiration |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Snapshot Expiration"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#real-world-example" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Real-world Example |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#views" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Views |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Views"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#check-if-a-view-exists" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Check if a view exists |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#table-statistics-management" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Table Statistics Management |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#query-the-data" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Query the data |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Query the data"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#apache-arrow" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Apache Arrow |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#pandas" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Pandas |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#duckdb" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| DuckDB |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#ray" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Ray |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#bodo" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Bodo |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#daft" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Daft |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#polars" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Polars |
| |
| </span> |
| </a> |
| |
| <nav class="md-nav" aria-label="Polars"> |
| <ul class="md-nav__list"> |
| |
| <li class="md-nav__item"> |
| <a href="#working-with-polars-dataframe" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Working with Polars DataFrame |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#working-with-polars-lazyframe" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Working with Polars LazyFrame |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| <li class="md-nav__item"> |
| <a href="#apache-datafusion" class="md-nav__link"> |
| <span class="md-ellipsis"> |
| |
| Apache DataFusion |
| |
| </span> |
| </a> |
| |
| </li> |
| |
| </ul> |
| </nav> |
| |
| </li> |
| |
| </ul> |
| |
| </nav> |
| </div> |
| </div> |
| </div> |
| |
| |
| |
| <div class="md-content" data-md-component="content"> |
| |
| <article class="md-content__inner md-typeset"> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| <!-- |
| - Licensed to the Apache Software Foundation (ASF) under one |
| - or more contributor license agreements. See the NOTICE file |
| - distributed with this work for additional information |
| - regarding copyright ownership. The ASF licenses this file |
| - to you under the Apache License, Version 2.0 (the |
| - "License"); you may not use this file except in compliance |
| - with the License. You may obtain a copy of the License at |
| - |
| - http://www.apache.org/licenses/LICENSE-2.0 |
| - |
| - Unless required by applicable law or agreed to in writing, |
| - software distributed under the License is distributed on an |
| - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| - KIND, either express or implied. See the License for the |
| - specific language governing permissions and limitations |
| - under the License. |
| --> |
| |
| <h1 id="python-api">Python API<a class="headerlink" href="#python-api" title="Permanent link">¶</a></h1> |
| <p>(Py)Iceberg is <a href="https://iceberg.apache.org/terms/#catalog">catalog</a> centric. Meaning that reading/writing data goes via a catalog. First step is to instantiate a catalog to load a table. Let's use the following configuration in <code>.pyiceberg.yaml</code> to define a REST catalog called <code>prod</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-0-1" name="__codelineno-0-1" href="#__codelineno-0-1"></a><span class="nt">catalog</span><span class="p">:</span> |
| <a id="__codelineno-0-2" name="__codelineno-0-2" href="#__codelineno-0-2"></a><span class="w"> </span><span class="nt">prod</span><span class="p">:</span> |
| <a id="__codelineno-0-3" name="__codelineno-0-3" href="#__codelineno-0-3"></a><span class="w"> </span><span class="nt">uri</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://rest-catalog/ws/</span> |
| <a id="__codelineno-0-4" name="__codelineno-0-4" href="#__codelineno-0-4"></a><span class="w"> </span><span class="nt">credential</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">t-1234:secret</span> |
| </code></pre></div> |
| <p>Note that multiple catalogs can be defined in the same <code>.pyiceberg.yaml</code>, for example, in the case of a Hive and REST catalog:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-1-1" name="__codelineno-1-1" href="#__codelineno-1-1"></a><span class="nt">catalog</span><span class="p">:</span> |
| <a id="__codelineno-1-2" name="__codelineno-1-2" href="#__codelineno-1-2"></a><span class="w"> </span><span class="nt">hive</span><span class="p">:</span> |
| <a id="__codelineno-1-3" name="__codelineno-1-3" href="#__codelineno-1-3"></a><span class="w"> </span><span class="nt">uri</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">thrift://127.0.0.1:9083</span> |
| <a id="__codelineno-1-4" name="__codelineno-1-4" href="#__codelineno-1-4"></a><span class="w"> </span><span class="nt">s3.endpoint</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">http://127.0.0.1:9000</span> |
| <a id="__codelineno-1-5" name="__codelineno-1-5" href="#__codelineno-1-5"></a><span class="w"> </span><span class="nt">s3.access-key-id</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">admin</span> |
| <a id="__codelineno-1-6" name="__codelineno-1-6" href="#__codelineno-1-6"></a><span class="w"> </span><span class="nt">s3.secret-access-key</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">password</span> |
| <a id="__codelineno-1-7" name="__codelineno-1-7" href="#__codelineno-1-7"></a><span class="w"> </span><span class="nt">rest</span><span class="p">:</span> |
| <a id="__codelineno-1-8" name="__codelineno-1-8" href="#__codelineno-1-8"></a><span class="w"> </span><span class="nt">uri</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">https://rest-server:8181/</span> |
| <a id="__codelineno-1-9" name="__codelineno-1-9" href="#__codelineno-1-9"></a><span class="w"> </span><span class="nt">warehouse</span><span class="p">:</span><span class="w"> </span><span class="l l-Scalar l-Scalar-Plain">my-warehouse</span> |
| </code></pre></div> |
| <p>The different catalogs can be loaded in PyIceberg by their name: <code>load_catalog(name="hive")</code> and <code>load_catalog(name="rest")</code>. An overview of the configuration options can be found on the <a href="https://py.iceberg.apache.org/configuration/">configuration page</a>.</p> |
| <p>This information must be placed inside a file called <code>.pyiceberg.yaml</code> located either in the <code>$HOME</code> or <code>%USERPROFILE%</code> directory (depending on whether the operating system is Unix-based or Windows-based, respectively), in the current working directory, or in the <code>$PYICEBERG_HOME</code> directory (if the corresponding environment variable is set).</p> |
| <p>It is also possible to load a catalog without using a <code>.pyiceberg.yaml</code> by passing in the properties directly:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-2-1" name="__codelineno-2-1" href="#__codelineno-2-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.catalog</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_catalog</span> |
| <a id="__codelineno-2-2" name="__codelineno-2-2" href="#__codelineno-2-2"></a> |
| <a id="__codelineno-2-3" name="__codelineno-2-3" href="#__codelineno-2-3"></a><span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">(</span> |
| <a id="__codelineno-2-4" name="__codelineno-2-4" href="#__codelineno-2-4"></a> <span class="s2">"docs"</span><span class="p">,</span> |
| <a id="__codelineno-2-5" name="__codelineno-2-5" href="#__codelineno-2-5"></a> <span class="o">**</span><span class="p">{</span> |
| <a id="__codelineno-2-6" name="__codelineno-2-6" href="#__codelineno-2-6"></a> <span class="s2">"uri"</span><span class="p">:</span> <span class="s2">"http://127.0.0.1:8181"</span><span class="p">,</span> |
| <a id="__codelineno-2-7" name="__codelineno-2-7" href="#__codelineno-2-7"></a> <span class="s2">"s3.endpoint"</span><span class="p">:</span> <span class="s2">"http://127.0.0.1:9000"</span><span class="p">,</span> |
| <a id="__codelineno-2-8" name="__codelineno-2-8" href="#__codelineno-2-8"></a> <span class="s2">"py-io-impl"</span><span class="p">:</span> <span class="s2">"pyiceberg.io.pyarrow.PyArrowFileIO"</span><span class="p">,</span> |
| <a id="__codelineno-2-9" name="__codelineno-2-9" href="#__codelineno-2-9"></a> <span class="s2">"s3.access-key-id"</span><span class="p">:</span> <span class="s2">"admin"</span><span class="p">,</span> |
| <a id="__codelineno-2-10" name="__codelineno-2-10" href="#__codelineno-2-10"></a> <span class="s2">"s3.secret-access-key"</span><span class="p">:</span> <span class="s2">"password"</span><span class="p">,</span> |
| <a id="__codelineno-2-11" name="__codelineno-2-11" href="#__codelineno-2-11"></a> <span class="p">}</span> |
| <a id="__codelineno-2-12" name="__codelineno-2-12" href="#__codelineno-2-12"></a><span class="p">)</span> |
| </code></pre></div> |
| <p>Next, create a namespace:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-3-1" name="__codelineno-3-1" href="#__codelineno-3-1"></a><span class="n">catalog</span><span class="o">.</span><span class="n">create_namespace</span><span class="p">(</span><span class="s2">"docs_example"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Or, list existing namespaces:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-4-1" name="__codelineno-4-1" href="#__codelineno-4-1"></a><span class="n">ns</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">list_namespaces</span><span class="p">()</span> |
| <a id="__codelineno-4-2" name="__codelineno-4-2" href="#__codelineno-4-2"></a> |
| <a id="__codelineno-4-3" name="__codelineno-4-3" href="#__codelineno-4-3"></a><span class="k">assert</span> <span class="n">ns</span> <span class="o">==</span> <span class="p">[(</span><span class="s2">"docs_example"</span><span class="p">,)]</span> |
| </code></pre></div> |
| <p>Next, update the namespace properties.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-5-1" name="__codelineno-5-1" href="#__codelineno-5-1"></a><span class="c1"># Load namespace properties</span> |
| <a id="__codelineno-5-2" name="__codelineno-5-2" href="#__codelineno-5-2"></a><span class="n">properties</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">load_namespace_properties</span><span class="p">(</span><span class="s2">"docs_example"</span><span class="p">)</span> |
| <a id="__codelineno-5-3" name="__codelineno-5-3" href="#__codelineno-5-3"></a> |
| <a id="__codelineno-5-4" name="__codelineno-5-4" href="#__codelineno-5-4"></a><span class="c1"># Update namespace properties with additions and removals.</span> |
| <a id="__codelineno-5-5" name="__codelineno-5-5" href="#__codelineno-5-5"></a><span class="n">catalog</span><span class="o">.</span><span class="n">update_namespace_properties</span><span class="p">(</span><span class="s2">"docs_example"</span><span class="p">,</span> <span class="n">removals</span><span class="o">=</span><span class="p">{</span><span class="s2">"remove-meee!"</span><span class="p">},</span> <span class="n">updates</span><span class="o">=</span><span class="p">{</span><span class="s2">"owner"</span><span class="p">:</span> <span class="s2">"iceberg"</span><span class="p">})</span> |
| </code></pre></div> |
| <p>Finally, drop the namespace (if you want!)</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-6-1" name="__codelineno-6-1" href="#__codelineno-6-1"></a><span class="c1"># Drop a namespace</span> |
| <a id="__codelineno-6-2" name="__codelineno-6-2" href="#__codelineno-6-2"></a><span class="n">catalog</span><span class="o">.</span><span class="n">drop_namespace</span><span class="p">(</span><span class="s2">"docs_example"</span><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="create-a-table">Create a table<a class="headerlink" href="#create-a-table" title="Permanent link">¶</a></h2> |
| <p>To create a table from a catalog:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-7-1" name="__codelineno-7-1" href="#__codelineno-7-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.schema</span><span class="w"> </span><span class="kn">import</span> <span class="n">Schema</span> |
| <a id="__codelineno-7-2" name="__codelineno-7-2" href="#__codelineno-7-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.types</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span> |
| <a id="__codelineno-7-3" name="__codelineno-7-3" href="#__codelineno-7-3"></a> <span class="n">TimestampType</span><span class="p">,</span> |
| <a id="__codelineno-7-4" name="__codelineno-7-4" href="#__codelineno-7-4"></a> <span class="n">FloatType</span><span class="p">,</span> |
| <a id="__codelineno-7-5" name="__codelineno-7-5" href="#__codelineno-7-5"></a> <span class="n">DoubleType</span><span class="p">,</span> |
| <a id="__codelineno-7-6" name="__codelineno-7-6" href="#__codelineno-7-6"></a> <span class="n">StringType</span><span class="p">,</span> |
| <a id="__codelineno-7-7" name="__codelineno-7-7" href="#__codelineno-7-7"></a> <span class="n">NestedField</span><span class="p">,</span> |
| <a id="__codelineno-7-8" name="__codelineno-7-8" href="#__codelineno-7-8"></a> <span class="n">StructType</span><span class="p">,</span> |
| <a id="__codelineno-7-9" name="__codelineno-7-9" href="#__codelineno-7-9"></a><span class="p">)</span> |
| <a id="__codelineno-7-10" name="__codelineno-7-10" href="#__codelineno-7-10"></a> |
| <a id="__codelineno-7-11" name="__codelineno-7-11" href="#__codelineno-7-11"></a><span class="n">schema</span> <span class="o">=</span> <span class="n">Schema</span><span class="p">(</span> |
| <a id="__codelineno-7-12" name="__codelineno-7-12" href="#__codelineno-7-12"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"datetime"</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">TimestampType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-7-13" name="__codelineno-7-13" href="#__codelineno-7-13"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"symbol"</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">StringType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-7-14" name="__codelineno-7-14" href="#__codelineno-7-14"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"bid"</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">FloatType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-7-15" name="__codelineno-7-15" href="#__codelineno-7-15"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"ask"</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">DoubleType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-7-16" name="__codelineno-7-16" href="#__codelineno-7-16"></a> <span class="n">NestedField</span><span class="p">(</span> |
| <a id="__codelineno-7-17" name="__codelineno-7-17" href="#__codelineno-7-17"></a> <span class="n">field_id</span><span class="o">=</span><span class="mi">5</span><span class="p">,</span> |
| <a id="__codelineno-7-18" name="__codelineno-7-18" href="#__codelineno-7-18"></a> <span class="n">name</span><span class="o">=</span><span class="s2">"details"</span><span class="p">,</span> |
| <a id="__codelineno-7-19" name="__codelineno-7-19" href="#__codelineno-7-19"></a> <span class="n">field_type</span><span class="o">=</span><span class="n">StructType</span><span class="p">(</span> |
| <a id="__codelineno-7-20" name="__codelineno-7-20" href="#__codelineno-7-20"></a> <span class="n">NestedField</span><span class="p">(</span> |
| <a id="__codelineno-7-21" name="__codelineno-7-21" href="#__codelineno-7-21"></a> <span class="n">field_id</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"created_by"</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">StringType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span> |
| <a id="__codelineno-7-22" name="__codelineno-7-22" href="#__codelineno-7-22"></a> <span class="p">),</span> |
| <a id="__codelineno-7-23" name="__codelineno-7-23" href="#__codelineno-7-23"></a> <span class="p">),</span> |
| <a id="__codelineno-7-24" name="__codelineno-7-24" href="#__codelineno-7-24"></a> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">,</span> |
| <a id="__codelineno-7-25" name="__codelineno-7-25" href="#__codelineno-7-25"></a> <span class="p">),</span> |
| <a id="__codelineno-7-26" name="__codelineno-7-26" href="#__codelineno-7-26"></a><span class="p">)</span> |
| <a id="__codelineno-7-27" name="__codelineno-7-27" href="#__codelineno-7-27"></a> |
| <a id="__codelineno-7-28" name="__codelineno-7-28" href="#__codelineno-7-28"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.partitioning</span><span class="w"> </span><span class="kn">import</span> <span class="n">PartitionSpec</span><span class="p">,</span> <span class="n">PartitionField</span> |
| <a id="__codelineno-7-29" name="__codelineno-7-29" href="#__codelineno-7-29"></a> |
| <a id="__codelineno-7-30" name="__codelineno-7-30" href="#__codelineno-7-30"></a><span class="n">partition_spec</span> <span class="o">=</span> <span class="n">PartitionSpec</span><span class="p">(</span> |
| <a id="__codelineno-7-31" name="__codelineno-7-31" href="#__codelineno-7-31"></a> <span class="n">PartitionField</span><span class="p">(</span> |
| <a id="__codelineno-7-32" name="__codelineno-7-32" href="#__codelineno-7-32"></a> <span class="n">source_id</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">field_id</span><span class="o">=</span><span class="mi">1000</span><span class="p">,</span> <span class="n">transform</span><span class="o">=</span><span class="s2">"day"</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s2">"datetime_day"</span> |
| <a id="__codelineno-7-33" name="__codelineno-7-33" href="#__codelineno-7-33"></a> <span class="p">)</span> |
| <a id="__codelineno-7-34" name="__codelineno-7-34" href="#__codelineno-7-34"></a><span class="p">)</span> |
| <a id="__codelineno-7-35" name="__codelineno-7-35" href="#__codelineno-7-35"></a> |
| <a id="__codelineno-7-36" name="__codelineno-7-36" href="#__codelineno-7-36"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.table.sorting</span><span class="w"> </span><span class="kn">import</span> <span class="n">SortOrder</span><span class="p">,</span> <span class="n">SortField</span> |
| <a id="__codelineno-7-37" name="__codelineno-7-37" href="#__codelineno-7-37"></a> |
| <a id="__codelineno-7-38" name="__codelineno-7-38" href="#__codelineno-7-38"></a><span class="c1"># Sort on the symbol</span> |
| <a id="__codelineno-7-39" name="__codelineno-7-39" href="#__codelineno-7-39"></a><span class="n">sort_order</span> <span class="o">=</span> <span class="n">SortOrder</span><span class="p">(</span><span class="n">SortField</span><span class="p">(</span><span class="n">source_id</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">transform</span><span class="o">=</span><span class="s1">'identity'</span><span class="p">))</span> |
| <a id="__codelineno-7-40" name="__codelineno-7-40" href="#__codelineno-7-40"></a> |
| <a id="__codelineno-7-41" name="__codelineno-7-41" href="#__codelineno-7-41"></a><span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span> |
| <a id="__codelineno-7-42" name="__codelineno-7-42" href="#__codelineno-7-42"></a> <span class="n">identifier</span><span class="o">=</span><span class="s2">"docs_example.bids"</span><span class="p">,</span> |
| <a id="__codelineno-7-43" name="__codelineno-7-43" href="#__codelineno-7-43"></a> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> |
| <a id="__codelineno-7-44" name="__codelineno-7-44" href="#__codelineno-7-44"></a> <span class="n">partition_spec</span><span class="o">=</span><span class="n">partition_spec</span><span class="p">,</span> |
| <a id="__codelineno-7-45" name="__codelineno-7-45" href="#__codelineno-7-45"></a> <span class="n">sort_order</span><span class="o">=</span><span class="n">sort_order</span><span class="p">,</span> |
| <a id="__codelineno-7-46" name="__codelineno-7-46" href="#__codelineno-7-46"></a><span class="p">)</span> |
| </code></pre></div> |
| <p>When the table is created, all IDs in the schema are re-assigned to ensure uniqueness.</p> |
| <p>To create a table using a pyarrow schema:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-8-1" name="__codelineno-8-1" href="#__codelineno-8-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span> |
| <a id="__codelineno-8-2" name="__codelineno-8-2" href="#__codelineno-8-2"></a> |
| <a id="__codelineno-8-3" name="__codelineno-8-3" href="#__codelineno-8-3"></a><span class="n">schema</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">([</span> |
| <a id="__codelineno-8-4" name="__codelineno-8-4" href="#__codelineno-8-4"></a> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">"foo"</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">string</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-8-5" name="__codelineno-8-5" href="#__codelineno-8-5"></a> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">"bar"</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int32</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-8-6" name="__codelineno-8-6" href="#__codelineno-8-6"></a> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">"baz"</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">bool_</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-8-7" name="__codelineno-8-7" href="#__codelineno-8-7"></a><span class="p">])</span> |
| <a id="__codelineno-8-8" name="__codelineno-8-8" href="#__codelineno-8-8"></a> |
| <a id="__codelineno-8-9" name="__codelineno-8-9" href="#__codelineno-8-9"></a><span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span> |
| <a id="__codelineno-8-10" name="__codelineno-8-10" href="#__codelineno-8-10"></a> <span class="n">identifier</span><span class="o">=</span><span class="s2">"docs_example.bids"</span><span class="p">,</span> |
| <a id="__codelineno-8-11" name="__codelineno-8-11" href="#__codelineno-8-11"></a> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> |
| <a id="__codelineno-8-12" name="__codelineno-8-12" href="#__codelineno-8-12"></a><span class="p">)</span> |
| </code></pre></div> |
| <p>Another API to create a table is using the <code>create_table_transaction</code>. This follows the same APIs when making updates to a table. This is a friendly API for both setting the partition specification and sort-order, because you don't have to deal with field-IDs.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-9-1" name="__codelineno-9-1" href="#__codelineno-9-1"></a><span class="k">with</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table_transaction</span><span class="p">(</span><span class="n">identifier</span><span class="o">=</span><span class="s2">"docs_example.bids"</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">)</span> <span class="k">as</span> <span class="n">txn</span><span class="p">:</span> |
| <a id="__codelineno-9-2" name="__codelineno-9-2" href="#__codelineno-9-2"></a> <span class="k">with</span> <span class="n">txn</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update_schema</span><span class="p">:</span> |
| <a id="__codelineno-9-3" name="__codelineno-9-3" href="#__codelineno-9-3"></a> <span class="n">update_schema</span><span class="o">.</span><span class="n">add_column</span><span class="p">(</span><span class="n">path</span><span class="o">=</span><span class="s2">"new_column"</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="s1">'string'</span><span class="p">)</span> |
| <a id="__codelineno-9-4" name="__codelineno-9-4" href="#__codelineno-9-4"></a> |
| <a id="__codelineno-9-5" name="__codelineno-9-5" href="#__codelineno-9-5"></a> <span class="k">with</span> <span class="n">txn</span><span class="o">.</span><span class="n">update_spec</span><span class="p">()</span> <span class="k">as</span> <span class="n">update_spec</span><span class="p">:</span> |
| <a id="__codelineno-9-6" name="__codelineno-9-6" href="#__codelineno-9-6"></a> <span class="n">update_spec</span><span class="o">.</span><span class="n">add_identity</span><span class="p">(</span><span class="s2">"symbol"</span><span class="p">)</span> |
| <a id="__codelineno-9-7" name="__codelineno-9-7" href="#__codelineno-9-7"></a> |
| <a id="__codelineno-9-8" name="__codelineno-9-8" href="#__codelineno-9-8"></a> <span class="n">txn</span><span class="o">.</span><span class="n">set_properties</span><span class="p">(</span><span class="n">test_a</span><span class="o">=</span><span class="s2">"test_aa"</span><span class="p">,</span> <span class="n">test_b</span><span class="o">=</span><span class="s2">"test_b"</span><span class="p">,</span> <span class="n">test_c</span><span class="o">=</span><span class="s2">"test_c"</span><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="register-a-table">Register a table<a class="headerlink" href="#register-a-table" title="Permanent link">¶</a></h2> |
| <p>To register a table using existing metadata:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-10-1" name="__codelineno-10-1" href="#__codelineno-10-1"></a><span class="n">catalog</span><span class="o">.</span><span class="n">register_table</span><span class="p">(</span> |
| <a id="__codelineno-10-2" name="__codelineno-10-2" href="#__codelineno-10-2"></a> <span class="n">identifier</span><span class="o">=</span><span class="s2">"docs_example.bids"</span><span class="p">,</span> |
| <a id="__codelineno-10-3" name="__codelineno-10-3" href="#__codelineno-10-3"></a> <span class="n">metadata_location</span><span class="o">=</span><span class="s2">"s3://warehouse/path/to/metadata.json"</span> |
| <a id="__codelineno-10-4" name="__codelineno-10-4" href="#__codelineno-10-4"></a><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="load-a-table">Load a table<a class="headerlink" href="#load-a-table" title="Permanent link">¶</a></h2> |
| <p>There are two ways of reading an Iceberg table; through a catalog, and by pointing at the Iceberg metadata directly. Reading through a catalog is preferred, and directly pointing at the metadata is read-only.</p> |
| <h3 id="catalog-table">Catalog table<a class="headerlink" href="#catalog-table" title="Permanent link">¶</a></h3> |
| <p>Loading the <code>bids</code> table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-11-1" name="__codelineno-11-1" href="#__codelineno-11-1"></a><span class="n">table</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">load_table</span><span class="p">(</span><span class="s2">"docs_example.bids"</span><span class="p">)</span> |
| <a id="__codelineno-11-2" name="__codelineno-11-2" href="#__codelineno-11-2"></a><span class="c1"># Equivalent to:</span> |
| <a id="__codelineno-11-3" name="__codelineno-11-3" href="#__codelineno-11-3"></a><span class="n">table</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">load_table</span><span class="p">((</span><span class="s2">"docs_example"</span><span class="p">,</span> <span class="s2">"bids"</span><span class="p">))</span> |
| <a id="__codelineno-11-4" name="__codelineno-11-4" href="#__codelineno-11-4"></a><span class="c1"># The tuple syntax can be used if the namespace or table contains a dot.</span> |
| </code></pre></div> |
| <p>This returns a <code>Table</code> that represents an Iceberg table that can be queried and altered.</p> |
| <h3 id="static-table">Static table<a class="headerlink" href="#static-table" title="Permanent link">¶</a></h3> |
| <p>To load a table directly from a <code>metadata.json</code> file (i.e., <strong>without</strong> using a catalog), you can use a <code>StaticTable</code> as follows:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-12-1" name="__codelineno-12-1" href="#__codelineno-12-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.table</span><span class="w"> </span><span class="kn">import</span> <span class="n">StaticTable</span> |
| <a id="__codelineno-12-2" name="__codelineno-12-2" href="#__codelineno-12-2"></a> |
| <a id="__codelineno-12-3" name="__codelineno-12-3" href="#__codelineno-12-3"></a><span class="n">static_table</span> <span class="o">=</span> <span class="n">StaticTable</span><span class="o">.</span><span class="n">from_metadata</span><span class="p">(</span> |
| <a id="__codelineno-12-4" name="__codelineno-12-4" href="#__codelineno-12-4"></a> <span class="s2">"s3://warehouse/wh/nyc.db/taxis/metadata/00002-6ea51ce3-62aa-4197-9cf8-43d07c3440ca.metadata.json"</span> |
| <a id="__codelineno-12-5" name="__codelineno-12-5" href="#__codelineno-12-5"></a><span class="p">)</span> |
| </code></pre></div> |
| <p>The static-table does not allow for write operations. If your table metadata directory contains a <code>version-hint.text</code> file, you can just specify the table root path, and the latest <code>metadata.json</code> file will be resolved automatically:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-13-1" name="__codelineno-13-1" href="#__codelineno-13-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.table</span><span class="w"> </span><span class="kn">import</span> <span class="n">StaticTable</span> |
| <a id="__codelineno-13-2" name="__codelineno-13-2" href="#__codelineno-13-2"></a> |
| <a id="__codelineno-13-3" name="__codelineno-13-3" href="#__codelineno-13-3"></a><span class="n">static_table</span> <span class="o">=</span> <span class="n">StaticTable</span><span class="o">.</span><span class="n">from_metadata</span><span class="p">(</span> |
| <a id="__codelineno-13-4" name="__codelineno-13-4" href="#__codelineno-13-4"></a> <span class="s2">"s3://warehouse/wh/nyc.db/taxis"</span> |
| <a id="__codelineno-13-5" name="__codelineno-13-5" href="#__codelineno-13-5"></a><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="check-if-a-table-exists">Check if a table exists<a class="headerlink" href="#check-if-a-table-exists" title="Permanent link">¶</a></h2> |
| <p>To check whether the <code>bids</code> table exists:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-14-1" name="__codelineno-14-1" href="#__codelineno-14-1"></a><span class="n">catalog</span><span class="o">.</span><span class="n">table_exists</span><span class="p">(</span><span class="s2">"docs_example.bids"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Returns <code>True</code> if the table already exists.</p> |
| <h2 id="rename-a-table">Rename a table<a class="headerlink" href="#rename-a-table" title="Permanent link">¶</a></h2> |
| <p>To rename a table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-15-1" name="__codelineno-15-1" href="#__codelineno-15-1"></a><span class="n">catalog</span><span class="o">.</span><span class="n">rename_table</span><span class="p">(</span> |
| <a id="__codelineno-15-2" name="__codelineno-15-2" href="#__codelineno-15-2"></a> <span class="n">from_identifier</span><span class="o">=</span><span class="s2">"docs_example.bids"</span><span class="p">,</span> |
| <a id="__codelineno-15-3" name="__codelineno-15-3" href="#__codelineno-15-3"></a> <span class="n">to_identifier</span><span class="o">=</span><span class="s2">"docs_example.bids_backup"</span> |
| <a id="__codelineno-15-4" name="__codelineno-15-4" href="#__codelineno-15-4"></a><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="drop-a-table">Drop a table<a class="headerlink" href="#drop-a-table" title="Permanent link">¶</a></h2> |
| <p>To drop a table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-16-1" name="__codelineno-16-1" href="#__codelineno-16-1"></a><span class="n">catalog</span><span class="o">.</span><span class="n">drop_table</span><span class="p">(</span><span class="s2">"docs_example.bids"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>To drop a table and purge all data and metadata files:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-17-1" name="__codelineno-17-1" href="#__codelineno-17-1"></a><span class="n">catalog</span><span class="o">.</span><span class="n">purge_table</span><span class="p">(</span><span class="s2">"docs_example.bids"</span><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="write-to-a-table">Write to a table<a class="headerlink" href="#write-to-a-table" title="Permanent link">¶</a></h2> |
| <p>Reading and writing is being done using <a href="https://arrow.apache.org/">Apache Arrow</a>. Arrow is an in-memory columnar format for fast data interchange and in-memory analytics. Let's consider the following Arrow Table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-18-1" name="__codelineno-18-1" href="#__codelineno-18-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span> |
| <a id="__codelineno-18-2" name="__codelineno-18-2" href="#__codelineno-18-2"></a> |
| <a id="__codelineno-18-3" name="__codelineno-18-3" href="#__codelineno-18-3"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-18-4" name="__codelineno-18-4" href="#__codelineno-18-4"></a> <span class="p">[</span> |
| <a id="__codelineno-18-5" name="__codelineno-18-5" href="#__codelineno-18-5"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Amsterdam"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">52.371807</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">4.896029</span><span class="p">},</span> |
| <a id="__codelineno-18-6" name="__codelineno-18-6" href="#__codelineno-18-6"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"San Francisco"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">37.773972</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="o">-</span><span class="mf">122.431297</span><span class="p">},</span> |
| <a id="__codelineno-18-7" name="__codelineno-18-7" href="#__codelineno-18-7"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Drachten"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">53.11254</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">6.0989</span><span class="p">},</span> |
| <a id="__codelineno-18-8" name="__codelineno-18-8" href="#__codelineno-18-8"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Paris"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">48.864716</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">2.349014</span><span class="p">},</span> |
| <a id="__codelineno-18-9" name="__codelineno-18-9" href="#__codelineno-18-9"></a> <span class="p">],</span> |
| <a id="__codelineno-18-10" name="__codelineno-18-10" href="#__codelineno-18-10"></a><span class="p">)</span> |
| </code></pre></div> |
| <p>Next, create a table using the Arrow schema:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-19-1" name="__codelineno-19-1" href="#__codelineno-19-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.catalog</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_catalog</span> |
| <a id="__codelineno-19-2" name="__codelineno-19-2" href="#__codelineno-19-2"></a> |
| <a id="__codelineno-19-3" name="__codelineno-19-3" href="#__codelineno-19-3"></a><span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">(</span><span class="s2">"default"</span><span class="p">)</span> |
| <a id="__codelineno-19-4" name="__codelineno-19-4" href="#__codelineno-19-4"></a> |
| <a id="__codelineno-19-5" name="__codelineno-19-5" href="#__codelineno-19-5"></a><span class="n">tbl</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span><span class="s2">"default.cities"</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">df</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Next, write the data to the table. Both <code>append</code> and <code>overwrite</code> produce the same result, since the table is empty on creation:</p> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note inline end"> |
| <p class="admonition-title">Fast append</p> |
| <p>PyIceberg defaults to the <a href="https://iceberg.apache.org/spec/#snapshots">fast append</a> to minimize the amount of data written. This enables fast commit operations, reducing the possibility of conflicts. The downside of the fast append is that it creates more metadata than a merge commit. <a href="https://github.com/apache/iceberg-python/issues/270">Compaction is planned</a> and will automatically rewrite all the metadata when a threshold is hit, to maintain performant reads.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-20-1" name="__codelineno-20-1" href="#__codelineno-20-1"></a><span class="n">tbl</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| <a id="__codelineno-20-2" name="__codelineno-20-2" href="#__codelineno-20-2"></a> |
| <a id="__codelineno-20-3" name="__codelineno-20-3" href="#__codelineno-20-3"></a><span class="c1"># or</span> |
| <a id="__codelineno-20-4" name="__codelineno-20-4" href="#__codelineno-20-4"></a> |
| <a id="__codelineno-20-5" name="__codelineno-20-5" href="#__codelineno-20-5"></a><span class="n">tbl</span><span class="o">.</span><span class="n">overwrite</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Now, the data is written to the table, and the table can be read using <code>tbl.scan().to_arrow()</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-21-1" name="__codelineno-21-1" href="#__codelineno-21-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-21-2" name="__codelineno-21-2" href="#__codelineno-21-2"></a><span class="n">city</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-21-3" name="__codelineno-21-3" href="#__codelineno-21-3"></a><span class="n">lat</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-21-4" name="__codelineno-21-4" href="#__codelineno-21-4"></a><span class="n">long</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-21-5" name="__codelineno-21-5" href="#__codelineno-21-5"></a><span class="o">----</span> |
| <a id="__codelineno-21-6" name="__codelineno-21-6" href="#__codelineno-21-6"></a><span class="n">city</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"Amsterdam"</span><span class="p">,</span><span class="s2">"San Francisco"</span><span class="p">,</span><span class="s2">"Drachten"</span><span class="p">,</span><span class="s2">"Paris"</span><span class="p">]]</span> |
| <a id="__codelineno-21-7" name="__codelineno-21-7" href="#__codelineno-21-7"></a><span class="n">lat</span><span class="p">:</span> <span class="p">[[</span><span class="mf">52.371807</span><span class="p">,</span><span class="mf">37.773972</span><span class="p">,</span><span class="mf">53.11254</span><span class="p">,</span><span class="mf">48.864716</span><span class="p">]]</span> |
| <a id="__codelineno-21-8" name="__codelineno-21-8" href="#__codelineno-21-8"></a><span class="n">long</span><span class="p">:</span> <span class="p">[[</span><span class="mf">4.896029</span><span class="p">,</span><span class="o">-</span><span class="mf">122.431297</span><span class="p">,</span><span class="mf">6.0989</span><span class="p">,</span><span class="mf">2.349014</span><span class="p">]]</span> |
| </code></pre></div> |
| <p>If we want to add more data, we can use <code>.append()</code> again:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-22-1" name="__codelineno-22-1" href="#__codelineno-22-1"></a><span class="n">tbl</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-22-2" name="__codelineno-22-2" href="#__codelineno-22-2"></a> <span class="p">[{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Groningen"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">53.21917</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">6.56667</span><span class="p">}],</span> |
| <a id="__codelineno-22-3" name="__codelineno-22-3" href="#__codelineno-22-3"></a><span class="p">))</span> |
| </code></pre></div> |
| <p>When reading the table <code>tbl.scan().to_arrow()</code> you can see that <code>Groningen</code> is now also part of the table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-23-1" name="__codelineno-23-1" href="#__codelineno-23-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-23-2" name="__codelineno-23-2" href="#__codelineno-23-2"></a><span class="n">city</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-23-3" name="__codelineno-23-3" href="#__codelineno-23-3"></a><span class="n">lat</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-23-4" name="__codelineno-23-4" href="#__codelineno-23-4"></a><span class="n">long</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-23-5" name="__codelineno-23-5" href="#__codelineno-23-5"></a><span class="o">----</span> |
| <a id="__codelineno-23-6" name="__codelineno-23-6" href="#__codelineno-23-6"></a><span class="n">city</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"Amsterdam"</span><span class="p">,</span><span class="s2">"San Francisco"</span><span class="p">,</span><span class="s2">"Drachten"</span><span class="p">,</span><span class="s2">"Paris"</span><span class="p">],[</span><span class="s2">"Groningen"</span><span class="p">]]</span> |
| <a id="__codelineno-23-7" name="__codelineno-23-7" href="#__codelineno-23-7"></a><span class="n">lat</span><span class="p">:</span> <span class="p">[[</span><span class="mf">52.371807</span><span class="p">,</span><span class="mf">37.773972</span><span class="p">,</span><span class="mf">53.11254</span><span class="p">,</span><span class="mf">48.864716</span><span class="p">],[</span><span class="mf">53.21917</span><span class="p">]]</span> |
| <a id="__codelineno-23-8" name="__codelineno-23-8" href="#__codelineno-23-8"></a><span class="n">long</span><span class="p">:</span> <span class="p">[[</span><span class="mf">4.896029</span><span class="p">,</span><span class="o">-</span><span class="mf">122.431297</span><span class="p">,</span><span class="mf">6.0989</span><span class="p">,</span><span class="mf">2.349014</span><span class="p">],[</span><span class="mf">6.56667</span><span class="p">]]</span> |
| </code></pre></div> |
| <p>The nested lists indicate the different Arrow buffers. Each of the writes produce a <a href="https://parquet.apache.org/">Parquet file</a> where each <a href="https://parquet.apache.org/docs/concepts/">row group</a> translates into an Arrow buffer. In the case where the table is large, PyIceberg also allows the option to stream the buffers using the Arrow <a href="https://arrow.apache.org/docs/python/generated/pyarrow.RecordBatchReader.html">RecordBatchReader</a>, avoiding pulling everything into memory right away:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-24-1" name="__codelineno-24-1" href="#__codelineno-24-1"></a><span class="k">for</span> <span class="n">buf</span> <span class="ow">in</span> <span class="n">tbl</span><span class="o">.</span><span class="n">scan</span><span class="p">()</span><span class="o">.</span><span class="n">to_arrow_batch_reader</span><span class="p">():</span> |
| <a id="__codelineno-24-2" name="__codelineno-24-2" href="#__codelineno-24-2"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Buffer contains </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">buf</span><span class="p">)</span><span class="si">}</span><span class="s2"> rows"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>To avoid any type inconsistencies during writing, you can convert the Iceberg table schema to Arrow:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-25-1" name="__codelineno-25-1" href="#__codelineno-25-1"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-25-2" name="__codelineno-25-2" href="#__codelineno-25-2"></a> <span class="p">[{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Groningen"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">53.21917</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">6.56667</span><span class="p">}],</span> <span class="n">schema</span><span class="o">=</span><span class="n">table</span><span class="o">.</span><span class="n">schema</span><span class="p">()</span><span class="o">.</span><span class="n">as_arrow</span><span class="p">()</span> |
| <a id="__codelineno-25-3" name="__codelineno-25-3" href="#__codelineno-25-3"></a><span class="p">)</span> |
| <a id="__codelineno-25-4" name="__codelineno-25-4" href="#__codelineno-25-4"></a> |
| <a id="__codelineno-25-5" name="__codelineno-25-5" href="#__codelineno-25-5"></a><span class="n">tbl</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| </code></pre></div> |
| <p>You can delete some of the data from the table by calling <code>tbl.delete()</code> with a desired <code>delete_filter</code>. This will use the Iceberg metadata to only open up the Parquet files that contain relevant information.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-26-1" name="__codelineno-26-1" href="#__codelineno-26-1"></a><span class="n">tbl</span><span class="o">.</span><span class="n">delete</span><span class="p">(</span><span class="n">delete_filter</span><span class="o">=</span><span class="s2">"city == 'Paris'"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>In the above example, any records where the city field value equals to <code>Paris</code> will be deleted. Running <code>tbl.scan().to_arrow()</code> will now yield:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-27-1" name="__codelineno-27-1" href="#__codelineno-27-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-27-2" name="__codelineno-27-2" href="#__codelineno-27-2"></a><span class="n">city</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-27-3" name="__codelineno-27-3" href="#__codelineno-27-3"></a><span class="n">lat</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-27-4" name="__codelineno-27-4" href="#__codelineno-27-4"></a><span class="n">long</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-27-5" name="__codelineno-27-5" href="#__codelineno-27-5"></a><span class="o">----</span> |
| <a id="__codelineno-27-6" name="__codelineno-27-6" href="#__codelineno-27-6"></a><span class="n">city</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"Amsterdam"</span><span class="p">,</span><span class="s2">"San Francisco"</span><span class="p">,</span><span class="s2">"Drachten"</span><span class="p">],[</span><span class="s2">"Groningen"</span><span class="p">]]</span> |
| <a id="__codelineno-27-7" name="__codelineno-27-7" href="#__codelineno-27-7"></a><span class="n">lat</span><span class="p">:</span> <span class="p">[[</span><span class="mf">52.371807</span><span class="p">,</span><span class="mf">37.773972</span><span class="p">,</span><span class="mf">53.11254</span><span class="p">],[</span><span class="mf">53.21917</span><span class="p">]]</span> |
| <a id="__codelineno-27-8" name="__codelineno-27-8" href="#__codelineno-27-8"></a><span class="n">long</span><span class="p">:</span> <span class="p">[[</span><span class="mf">4.896029</span><span class="p">,</span><span class="o">-</span><span class="mf">122.431297</span><span class="p">,</span><span class="mf">6.0989</span><span class="p">],[</span><span class="mf">6.56667</span><span class="p">]]</span> |
| </code></pre></div> |
| <p>In the case of <code>tbl.delete(delete_filter="city == 'Groningen'")</code>, the whole Parquet file will be dropped without checking it contents, since from the Iceberg metadata PyIceberg can derive that all the content in the file matches the predicate.</p> |
| <h3 id="partial-overwrites">Partial overwrites<a class="headerlink" href="#partial-overwrites" title="Permanent link">¶</a></h3> |
| <p>When using the <code>overwrite</code> API, you can use an <code>overwrite_filter</code> to delete data that matches the filter before appending new data into the table. For example, consider the following Iceberg table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-28-1" name="__codelineno-28-1" href="#__codelineno-28-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span> |
| <a id="__codelineno-28-2" name="__codelineno-28-2" href="#__codelineno-28-2"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-28-3" name="__codelineno-28-3" href="#__codelineno-28-3"></a> <span class="p">[</span> |
| <a id="__codelineno-28-4" name="__codelineno-28-4" href="#__codelineno-28-4"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Amsterdam"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">52.371807</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">4.896029</span><span class="p">},</span> |
| <a id="__codelineno-28-5" name="__codelineno-28-5" href="#__codelineno-28-5"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"San Francisco"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">37.773972</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="o">-</span><span class="mf">122.431297</span><span class="p">},</span> |
| <a id="__codelineno-28-6" name="__codelineno-28-6" href="#__codelineno-28-6"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Drachten"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">53.11254</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">6.0989</span><span class="p">},</span> |
| <a id="__codelineno-28-7" name="__codelineno-28-7" href="#__codelineno-28-7"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Paris"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">48.864716</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">2.349014</span><span class="p">},</span> |
| <a id="__codelineno-28-8" name="__codelineno-28-8" href="#__codelineno-28-8"></a> <span class="p">],</span> |
| <a id="__codelineno-28-9" name="__codelineno-28-9" href="#__codelineno-28-9"></a><span class="p">)</span> |
| <a id="__codelineno-28-10" name="__codelineno-28-10" href="#__codelineno-28-10"></a> |
| <a id="__codelineno-28-11" name="__codelineno-28-11" href="#__codelineno-28-11"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.catalog</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_catalog</span> |
| <a id="__codelineno-28-12" name="__codelineno-28-12" href="#__codelineno-28-12"></a><span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">(</span><span class="s2">"default"</span><span class="p">)</span> |
| <a id="__codelineno-28-13" name="__codelineno-28-13" href="#__codelineno-28-13"></a> |
| <a id="__codelineno-28-14" name="__codelineno-28-14" href="#__codelineno-28-14"></a><span class="n">tbl</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span><span class="s2">"default.cities"</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">df</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span> |
| <a id="__codelineno-28-15" name="__codelineno-28-15" href="#__codelineno-28-15"></a> |
| <a id="__codelineno-28-16" name="__codelineno-28-16" href="#__codelineno-28-16"></a><span class="n">tbl</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| </code></pre></div> |
| <p>You can overwrite the record of <code>Paris</code> with a record of <code>New York</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-29-1" name="__codelineno-29-1" href="#__codelineno-29-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.expressions</span><span class="w"> </span><span class="kn">import</span> <span class="n">EqualTo</span> |
| <a id="__codelineno-29-2" name="__codelineno-29-2" href="#__codelineno-29-2"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-29-3" name="__codelineno-29-3" href="#__codelineno-29-3"></a> <span class="p">[</span> |
| <a id="__codelineno-29-4" name="__codelineno-29-4" href="#__codelineno-29-4"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"New York"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">40.7128</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">74.0060</span><span class="p">},</span> |
| <a id="__codelineno-29-5" name="__codelineno-29-5" href="#__codelineno-29-5"></a> <span class="p">]</span> |
| <a id="__codelineno-29-6" name="__codelineno-29-6" href="#__codelineno-29-6"></a><span class="p">)</span> |
| <a id="__codelineno-29-7" name="__codelineno-29-7" href="#__codelineno-29-7"></a><span class="n">tbl</span><span class="o">.</span><span class="n">overwrite</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">overwrite_filter</span><span class="o">=</span><span class="n">EqualTo</span><span class="p">(</span><span class="s1">'city'</span><span class="p">,</span> <span class="s2">"Paris"</span><span class="p">))</span> |
| </code></pre></div> |
| <p>This produces the following result with <code>tbl.scan().to_arrow()</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-30-1" name="__codelineno-30-1" href="#__codelineno-30-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-30-2" name="__codelineno-30-2" href="#__codelineno-30-2"></a><span class="n">city</span><span class="p">:</span> <span class="n">large_string</span> |
| <a id="__codelineno-30-3" name="__codelineno-30-3" href="#__codelineno-30-3"></a><span class="n">lat</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-30-4" name="__codelineno-30-4" href="#__codelineno-30-4"></a><span class="n">long</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-30-5" name="__codelineno-30-5" href="#__codelineno-30-5"></a><span class="o">----</span> |
| <a id="__codelineno-30-6" name="__codelineno-30-6" href="#__codelineno-30-6"></a><span class="n">city</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"New York"</span><span class="p">],[</span><span class="s2">"Amsterdam"</span><span class="p">,</span><span class="s2">"San Francisco"</span><span class="p">,</span><span class="s2">"Drachten"</span><span class="p">]]</span> |
| <a id="__codelineno-30-7" name="__codelineno-30-7" href="#__codelineno-30-7"></a><span class="n">lat</span><span class="p">:</span> <span class="p">[[</span><span class="mf">40.7128</span><span class="p">],[</span><span class="mf">52.371807</span><span class="p">,</span><span class="mf">37.773972</span><span class="p">,</span><span class="mf">53.11254</span><span class="p">]]</span> |
| <a id="__codelineno-30-8" name="__codelineno-30-8" href="#__codelineno-30-8"></a><span class="n">long</span><span class="p">:</span> <span class="p">[[</span><span class="mf">74.006</span><span class="p">],[</span><span class="mf">4.896029</span><span class="p">,</span><span class="o">-</span><span class="mf">122.431297</span><span class="p">,</span><span class="mf">6.0989</span><span class="p">]]</span> |
| </code></pre></div> |
| <p>If the PyIceberg table is partitioned, you can use <code>tbl.dynamic_partition_overwrite(df)</code> to replace the existing partitions with new ones provided in the dataframe. The partitions to be replaced are detected automatically from the provided arrow table. |
| For example, with an iceberg table with a partition specified on <code>"city"</code> field:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-31-1" name="__codelineno-31-1" href="#__codelineno-31-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.schema</span><span class="w"> </span><span class="kn">import</span> <span class="n">Schema</span> |
| <a id="__codelineno-31-2" name="__codelineno-31-2" href="#__codelineno-31-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.types</span><span class="w"> </span><span class="kn">import</span> <span class="n">DoubleType</span><span class="p">,</span> <span class="n">NestedField</span><span class="p">,</span> <span class="n">StringType</span> |
| <a id="__codelineno-31-3" name="__codelineno-31-3" href="#__codelineno-31-3"></a> |
| <a id="__codelineno-31-4" name="__codelineno-31-4" href="#__codelineno-31-4"></a><span class="n">schema</span> <span class="o">=</span> <span class="n">Schema</span><span class="p">(</span> |
| <a id="__codelineno-31-5" name="__codelineno-31-5" href="#__codelineno-31-5"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s2">"city"</span><span class="p">,</span> <span class="n">StringType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-31-6" name="__codelineno-31-6" href="#__codelineno-31-6"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-31-7" name="__codelineno-31-7" href="#__codelineno-31-7"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-31-8" name="__codelineno-31-8" href="#__codelineno-31-8"></a><span class="p">)</span> |
| <a id="__codelineno-31-9" name="__codelineno-31-9" href="#__codelineno-31-9"></a> |
| <a id="__codelineno-31-10" name="__codelineno-31-10" href="#__codelineno-31-10"></a><span class="n">tbl</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span> |
| <a id="__codelineno-31-11" name="__codelineno-31-11" href="#__codelineno-31-11"></a> <span class="s2">"default.cities"</span><span class="p">,</span> |
| <a id="__codelineno-31-12" name="__codelineno-31-12" href="#__codelineno-31-12"></a> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">,</span> |
| <a id="__codelineno-31-13" name="__codelineno-31-13" href="#__codelineno-31-13"></a> <span class="n">partition_spec</span><span class="o">=</span><span class="n">PartitionSpec</span><span class="p">(</span><span class="n">PartitionField</span><span class="p">(</span><span class="n">source_id</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">field_id</span><span class="o">=</span><span class="mi">1001</span><span class="p">,</span> <span class="n">transform</span><span class="o">=</span><span class="n">IdentityTransform</span><span class="p">(),</span> <span class="n">name</span><span class="o">=</span><span class="s2">"city_identity"</span><span class="p">))</span> |
| <a id="__codelineno-31-14" name="__codelineno-31-14" href="#__codelineno-31-14"></a><span class="p">)</span> |
| </code></pre></div> |
| <p>And we want to overwrite the data for the partition of <code>"Paris"</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-32-1" name="__codelineno-32-1" href="#__codelineno-32-1"></a><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span> |
| <a id="__codelineno-32-2" name="__codelineno-32-2" href="#__codelineno-32-2"></a> |
| <a id="__codelineno-32-3" name="__codelineno-32-3" href="#__codelineno-32-3"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-32-4" name="__codelineno-32-4" href="#__codelineno-32-4"></a> <span class="p">[</span> |
| <a id="__codelineno-32-5" name="__codelineno-32-5" href="#__codelineno-32-5"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Amsterdam"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">52.371807</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">4.896029</span><span class="p">},</span> |
| <a id="__codelineno-32-6" name="__codelineno-32-6" href="#__codelineno-32-6"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"San Francisco"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">37.773972</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="o">-</span><span class="mf">122.431297</span><span class="p">},</span> |
| <a id="__codelineno-32-7" name="__codelineno-32-7" href="#__codelineno-32-7"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Drachten"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">53.11254</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">6.0989</span><span class="p">},</span> |
| <a id="__codelineno-32-8" name="__codelineno-32-8" href="#__codelineno-32-8"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Paris"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="o">-</span><span class="mf">48.864716</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="o">-</span><span class="mf">2.349014</span><span class="p">},</span> |
| <a id="__codelineno-32-9" name="__codelineno-32-9" href="#__codelineno-32-9"></a> <span class="p">],</span> |
| <a id="__codelineno-32-10" name="__codelineno-32-10" href="#__codelineno-32-10"></a><span class="p">)</span> |
| <a id="__codelineno-32-11" name="__codelineno-32-11" href="#__codelineno-32-11"></a><span class="n">tbl</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Then we can call <code>dynamic_partition_overwrite</code> with this arrow table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-33-1" name="__codelineno-33-1" href="#__codelineno-33-1"></a><span class="n">df_corrected</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">([</span> |
| <a id="__codelineno-33-2" name="__codelineno-33-2" href="#__codelineno-33-2"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Paris"</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">:</span> <span class="mf">48.864716</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">:</span> <span class="mf">2.349014</span><span class="p">}</span> |
| <a id="__codelineno-33-3" name="__codelineno-33-3" href="#__codelineno-33-3"></a><span class="p">])</span> |
| <a id="__codelineno-33-4" name="__codelineno-33-4" href="#__codelineno-33-4"></a><span class="n">tbl</span><span class="o">.</span><span class="n">dynamic_partition_overwrite</span><span class="p">(</span><span class="n">df_corrected</span><span class="p">)</span> |
| </code></pre></div> |
| <p>This produces the following result with <code>tbl.scan().to_arrow()</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-34-1" name="__codelineno-34-1" href="#__codelineno-34-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-34-2" name="__codelineno-34-2" href="#__codelineno-34-2"></a><span class="n">city</span><span class="p">:</span> <span class="n">large_string</span> |
| <a id="__codelineno-34-3" name="__codelineno-34-3" href="#__codelineno-34-3"></a><span class="n">lat</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-34-4" name="__codelineno-34-4" href="#__codelineno-34-4"></a><span class="n">long</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-34-5" name="__codelineno-34-5" href="#__codelineno-34-5"></a><span class="o">----</span> |
| <a id="__codelineno-34-6" name="__codelineno-34-6" href="#__codelineno-34-6"></a><span class="n">city</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"Paris"</span><span class="p">],[</span><span class="s2">"Amsterdam"</span><span class="p">],[</span><span class="s2">"Drachten"</span><span class="p">],[</span><span class="s2">"San Francisco"</span><span class="p">]]</span> |
| <a id="__codelineno-34-7" name="__codelineno-34-7" href="#__codelineno-34-7"></a><span class="n">lat</span><span class="p">:</span> <span class="p">[[</span><span class="mf">48.864716</span><span class="p">],[</span><span class="mf">52.371807</span><span class="p">],[</span><span class="mf">53.11254</span><span class="p">],[</span><span class="mf">37.773972</span><span class="p">]]</span> |
| <a id="__codelineno-34-8" name="__codelineno-34-8" href="#__codelineno-34-8"></a><span class="n">long</span><span class="p">:</span> <span class="p">[[</span><span class="mf">2.349014</span><span class="p">],[</span><span class="mf">4.896029</span><span class="p">],[</span><span class="mf">6.0989</span><span class="p">],[</span><span class="o">-</span><span class="mf">122.431297</span><span class="p">]]</span> |
| </code></pre></div> |
| <h3 id="upsert">Upsert<a class="headerlink" href="#upsert" title="Permanent link">¶</a></h3> |
| <p>PyIceberg supports upsert operations, meaning that it is able to merge an Arrow table into an Iceberg table. Rows are considered the same based on the <a href="https://iceberg.apache.org/spec/?column-projection#identifier-field-ids">identifier field</a>. If a row is already in the table, it will update that row. If a row cannot be found, it will insert that new row.</p> |
| <p>Consider the following table, with some data:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-35-1" name="__codelineno-35-1" href="#__codelineno-35-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.schema</span><span class="w"> </span><span class="kn">import</span> <span class="n">Schema</span> |
| <a id="__codelineno-35-2" name="__codelineno-35-2" href="#__codelineno-35-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.types</span><span class="w"> </span><span class="kn">import</span> <span class="n">IntegerType</span><span class="p">,</span> <span class="n">NestedField</span><span class="p">,</span> <span class="n">StringType</span> |
| <a id="__codelineno-35-3" name="__codelineno-35-3" href="#__codelineno-35-3"></a> |
| <a id="__codelineno-35-4" name="__codelineno-35-4" href="#__codelineno-35-4"></a><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span> |
| <a id="__codelineno-35-5" name="__codelineno-35-5" href="#__codelineno-35-5"></a> |
| <a id="__codelineno-35-6" name="__codelineno-35-6" href="#__codelineno-35-6"></a><span class="n">schema</span> <span class="o">=</span> <span class="n">Schema</span><span class="p">(</span> |
| <a id="__codelineno-35-7" name="__codelineno-35-7" href="#__codelineno-35-7"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s2">"city"</span><span class="p">,</span> <span class="n">StringType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-35-8" name="__codelineno-35-8" href="#__codelineno-35-8"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-35-9" name="__codelineno-35-9" href="#__codelineno-35-9"></a> <span class="c1"># Mark City as the identifier field, also known as the primary-key</span> |
| <a id="__codelineno-35-10" name="__codelineno-35-10" href="#__codelineno-35-10"></a> <span class="n">identifier_field_ids</span><span class="o">=</span><span class="p">[</span><span class="mi">1</span><span class="p">]</span> |
| <a id="__codelineno-35-11" name="__codelineno-35-11" href="#__codelineno-35-11"></a><span class="p">)</span> |
| <a id="__codelineno-35-12" name="__codelineno-35-12" href="#__codelineno-35-12"></a> |
| <a id="__codelineno-35-13" name="__codelineno-35-13" href="#__codelineno-35-13"></a><span class="n">tbl</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span><span class="s2">"default.cities"</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span><span class="p">)</span> |
| <a id="__codelineno-35-14" name="__codelineno-35-14" href="#__codelineno-35-14"></a> |
| <a id="__codelineno-35-15" name="__codelineno-35-15" href="#__codelineno-35-15"></a><span class="n">arrow_schema</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">schema</span><span class="p">(</span> |
| <a id="__codelineno-35-16" name="__codelineno-35-16" href="#__codelineno-35-16"></a> <span class="p">[</span> |
| <a id="__codelineno-35-17" name="__codelineno-35-17" href="#__codelineno-35-17"></a> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">"city"</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">string</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-35-18" name="__codelineno-35-18" href="#__codelineno-35-18"></a> <span class="n">pa</span><span class="o">.</span><span class="n">field</span><span class="p">(</span><span class="s2">"inhabitants"</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">int32</span><span class="p">(),</span> <span class="n">nullable</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-35-19" name="__codelineno-35-19" href="#__codelineno-35-19"></a> <span class="p">]</span> |
| <a id="__codelineno-35-20" name="__codelineno-35-20" href="#__codelineno-35-20"></a><span class="p">)</span> |
| <a id="__codelineno-35-21" name="__codelineno-35-21" href="#__codelineno-35-21"></a> |
| <a id="__codelineno-35-22" name="__codelineno-35-22" href="#__codelineno-35-22"></a><span class="c1"># Write some data</span> |
| <a id="__codelineno-35-23" name="__codelineno-35-23" href="#__codelineno-35-23"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-35-24" name="__codelineno-35-24" href="#__codelineno-35-24"></a> <span class="p">[</span> |
| <a id="__codelineno-35-25" name="__codelineno-35-25" href="#__codelineno-35-25"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Amsterdam"</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">:</span> <span class="mi">921402</span><span class="p">},</span> |
| <a id="__codelineno-35-26" name="__codelineno-35-26" href="#__codelineno-35-26"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"San Francisco"</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">:</span> <span class="mi">808988</span><span class="p">},</span> |
| <a id="__codelineno-35-27" name="__codelineno-35-27" href="#__codelineno-35-27"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Drachten"</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">:</span> <span class="mi">45019</span><span class="p">},</span> |
| <a id="__codelineno-35-28" name="__codelineno-35-28" href="#__codelineno-35-28"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Paris"</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">:</span> <span class="mi">2103000</span><span class="p">},</span> |
| <a id="__codelineno-35-29" name="__codelineno-35-29" href="#__codelineno-35-29"></a> <span class="p">],</span> |
| <a id="__codelineno-35-30" name="__codelineno-35-30" href="#__codelineno-35-30"></a> <span class="n">schema</span><span class="o">=</span><span class="n">arrow_schema</span> |
| <a id="__codelineno-35-31" name="__codelineno-35-31" href="#__codelineno-35-31"></a><span class="p">)</span> |
| <a id="__codelineno-35-32" name="__codelineno-35-32" href="#__codelineno-35-32"></a><span class="n">tbl</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Next, we'll upsert a table into the Iceberg table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-36-1" name="__codelineno-36-1" href="#__codelineno-36-1"></a><span class="n">df</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-36-2" name="__codelineno-36-2" href="#__codelineno-36-2"></a> <span class="p">[</span> |
| <a id="__codelineno-36-3" name="__codelineno-36-3" href="#__codelineno-36-3"></a> <span class="c1"># Will be updated, the inhabitants has been updated</span> |
| <a id="__codelineno-36-4" name="__codelineno-36-4" href="#__codelineno-36-4"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Drachten"</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">:</span> <span class="mi">45505</span><span class="p">},</span> |
| <a id="__codelineno-36-5" name="__codelineno-36-5" href="#__codelineno-36-5"></a> |
| <a id="__codelineno-36-6" name="__codelineno-36-6" href="#__codelineno-36-6"></a> <span class="c1"># New row, will be inserted</span> |
| <a id="__codelineno-36-7" name="__codelineno-36-7" href="#__codelineno-36-7"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Berlin"</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">:</span> <span class="mi">3432000</span><span class="p">},</span> |
| <a id="__codelineno-36-8" name="__codelineno-36-8" href="#__codelineno-36-8"></a> |
| <a id="__codelineno-36-9" name="__codelineno-36-9" href="#__codelineno-36-9"></a> <span class="c1"># Ignored, already exists in the table</span> |
| <a id="__codelineno-36-10" name="__codelineno-36-10" href="#__codelineno-36-10"></a> <span class="p">{</span><span class="s2">"city"</span><span class="p">:</span> <span class="s2">"Paris"</span><span class="p">,</span> <span class="s2">"inhabitants"</span><span class="p">:</span> <span class="mi">2103000</span><span class="p">},</span> |
| <a id="__codelineno-36-11" name="__codelineno-36-11" href="#__codelineno-36-11"></a> <span class="p">],</span> |
| <a id="__codelineno-36-12" name="__codelineno-36-12" href="#__codelineno-36-12"></a> <span class="n">schema</span><span class="o">=</span><span class="n">arrow_schema</span> |
| <a id="__codelineno-36-13" name="__codelineno-36-13" href="#__codelineno-36-13"></a><span class="p">)</span> |
| <a id="__codelineno-36-14" name="__codelineno-36-14" href="#__codelineno-36-14"></a><span class="n">upd</span> <span class="o">=</span> <span class="n">tbl</span><span class="o">.</span><span class="n">upsert</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| <a id="__codelineno-36-15" name="__codelineno-36-15" href="#__codelineno-36-15"></a> |
| <a id="__codelineno-36-16" name="__codelineno-36-16" href="#__codelineno-36-16"></a><span class="k">assert</span> <span class="n">upd</span><span class="o">.</span><span class="n">rows_updated</span> <span class="o">==</span> <span class="mi">1</span> |
| <a id="__codelineno-36-17" name="__codelineno-36-17" href="#__codelineno-36-17"></a><span class="k">assert</span> <span class="n">upd</span><span class="o">.</span><span class="n">rows_inserted</span> <span class="o">==</span> <span class="mi">1</span> |
| </code></pre></div> |
| <p>PyIceberg will automatically detect which rows need to be updated, inserted or can simply be ignored.</p> |
| <h2 id="inspecting-tables">Inspecting tables<a class="headerlink" href="#inspecting-tables" title="Permanent link">¶</a></h2> |
| <p>To explore the table metadata, tables can be inspected.</p> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition tip"> |
| <p class="admonition-title">Time Travel</p> |
| <p>To inspect a tables's metadata with the time travel feature, call the inspect table method with the <code>snapshot_id</code> argument. |
| Time travel is supported on all metadata tables except <code>snapshots</code> and <code>refs</code>. |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-37-1" name="__codelineno-37-1" href="#__codelineno-37-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">entries</span><span class="p">(</span><span class="n">snapshot_id</span><span class="o">=</span><span class="mi">805611270568163028</span><span class="p">)</span> |
| </code></pre></div></p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <h3 id="snapshots">Snapshots<a class="headerlink" href="#snapshots" title="Permanent link">¶</a></h3> |
| <p>Inspect the snapshots of the table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-38-1" name="__codelineno-38-1" href="#__codelineno-38-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">snapshots</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-39-1" name="__codelineno-39-1" href="#__codelineno-39-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-39-2" name="__codelineno-39-2" href="#__codelineno-39-2"></a><span class="n">committed_at</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">ms</span><span class="p">]</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-39-3" name="__codelineno-39-3" href="#__codelineno-39-3"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-39-4" name="__codelineno-39-4" href="#__codelineno-39-4"></a><span class="n">parent_id</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-39-5" name="__codelineno-39-5" href="#__codelineno-39-5"></a><span class="n">operation</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-39-6" name="__codelineno-39-6" href="#__codelineno-39-6"></a><span class="n">manifest_list</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-39-7" name="__codelineno-39-7" href="#__codelineno-39-7"></a><span class="n">summary</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">string</span><span class="p">,</span> <span class="n">string</span><span class="o">></span> |
| <a id="__codelineno-39-8" name="__codelineno-39-8" href="#__codelineno-39-8"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">string</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-39-9" name="__codelineno-39-9" href="#__codelineno-39-9"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-39-10" name="__codelineno-39-10" href="#__codelineno-39-10"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-39-11" name="__codelineno-39-11" href="#__codelineno-39-11"></a><span class="o">----</span> |
| <a id="__codelineno-39-12" name="__codelineno-39-12" href="#__codelineno-39-12"></a><span class="n">committed_at</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2024</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">15</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">25.682</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">15</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">25.730</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">03</span><span class="o">-</span><span class="mi">15</span> <span class="mi">15</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mf">25.772</span><span class="p">]]</span> |
| <a id="__codelineno-39-13" name="__codelineno-39-13" href="#__codelineno-39-13"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">805611270568163028</span><span class="p">,</span><span class="mi">3679426539959220963</span><span class="p">,</span><span class="mi">5588071473139865870</span><span class="p">]]</span> |
| <a id="__codelineno-39-14" name="__codelineno-39-14" href="#__codelineno-39-14"></a><span class="n">parent_id</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">805611270568163028</span><span class="p">,</span><span class="mi">3679426539959220963</span><span class="p">]]</span> |
| <a id="__codelineno-39-15" name="__codelineno-39-15" href="#__codelineno-39-15"></a><span class="n">operation</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"append"</span><span class="p">,</span><span class="s2">"overwrite"</span><span class="p">,</span><span class="s2">"append"</span><span class="p">]]</span> |
| <a id="__codelineno-39-16" name="__codelineno-39-16" href="#__codelineno-39-16"></a><span class="n">manifest_list</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"s3://warehouse/default/table_metadata_snapshots/metadata/snap-805611270568163028-0-43637daf-ea4b-4ceb-b096-a60c25481eb5.avro"</span><span class="p">,</span><span class="s2">"s3://warehouse/default/table_metadata_snapshots/metadata/snap-3679426539959220963-0-8be81019-adf1-4bb6-a127-e15217bd50b3.avro"</span><span class="p">,</span><span class="s2">"s3://warehouse/default/table_metadata_snapshots/metadata/snap-5588071473139865870-0-1382dd7e-5fbc-4c51-9776-a832d7d0984e.avro"</span><span class="p">]]</span> |
| <a id="__codelineno-39-17" name="__codelineno-39-17" href="#__codelineno-39-17"></a><span class="n">summary</span><span class="p">:</span> <span class="p">[[</span><span class="n">keys</span><span class="p">:[</span><span class="s2">"added-files-size"</span><span class="p">,</span><span class="s2">"added-data-files"</span><span class="p">,</span><span class="s2">"added-records"</span><span class="p">,</span><span class="s2">"total-data-files"</span><span class="p">,</span><span class="s2">"total-delete-files"</span><span class="p">,</span><span class="s2">"total-records"</span><span class="p">,</span><span class="s2">"total-files-size"</span><span class="p">,</span><span class="s2">"total-position-deletes"</span><span class="p">,</span><span class="s2">"total-equality-deletes"</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="s2">"5459"</span><span class="p">,</span><span class="s2">"1"</span><span class="p">,</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"1"</span><span class="p">,</span><span class="s2">"0"</span><span class="p">,</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"5459"</span><span class="p">,</span><span class="s2">"0"</span><span class="p">,</span><span class="s2">"0"</span><span class="p">],</span><span class="n">keys</span><span class="p">:[</span><span class="s2">"added-files-size"</span><span class="p">,</span><span class="s2">"added-data-files"</span><span class="p">,</span><span class="s2">"added-records"</span><span class="p">,</span><span class="s2">"total-data-files"</span><span class="p">,</span><span class="s2">"total-records"</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="s2">"total-equality-deletes"</span><span class="p">,</span><span class="s2">"total-files-size"</span><span class="p">,</span><span class="s2">"deleted-data-files"</span><span class="p">,</span><span class="s2">"deleted-records"</span><span class="p">,</span><span class="s2">"removed-files-size"</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="s2">"5459"</span><span class="p">,</span><span class="s2">"1"</span><span class="p">,</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"1"</span><span class="p">,</span><span class="s2">"3"</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="s2">"0"</span><span class="p">,</span><span class="s2">"5459"</span><span class="p">,</span><span class="s2">"1"</span><span class="p">,</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"5459"</span><span class="p">],</span><span class="n">keys</span><span class="p">:[</span><span class="s2">"added-files-size"</span><span class="p">,</span><span class="s2">"added-data-files"</span><span class="p">,</span><span class="s2">"added-records"</span><span class="p">,</span><span class="s2">"total-data-files"</span><span class="p">,</span><span class="s2">"total-delete-files"</span><span class="p">,</span><span class="s2">"total-records"</span><span class="p">,</span><span class="s2">"total-files-size"</span><span class="p">,</span><span class="s2">"total-position-deletes"</span><span class="p">,</span><span class="s2">"total-equality-deletes"</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="s2">"5459"</span><span class="p">,</span><span class="s2">"1"</span><span class="p">,</span><span class="s2">"3"</span><span class="p">,</span><span class="s2">"2"</span><span class="p">,</span><span class="s2">"0"</span><span class="p">,</span><span class="s2">"6"</span><span class="p">,</span><span class="s2">"10918"</span><span class="p">,</span><span class="s2">"0"</span><span class="p">,</span><span class="s2">"0"</span><span class="p">]]]</span> |
| </code></pre></div> |
| <h3 id="partitions">Partitions<a class="headerlink" href="#partitions" title="Permanent link">¶</a></h3> |
| <p>Inspect the partitions of the table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-40-1" name="__codelineno-40-1" href="#__codelineno-40-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">partitions</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-41-1" name="__codelineno-41-1" href="#__codelineno-41-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-41-2" name="__codelineno-41-2" href="#__codelineno-41-2"></a><span class="n">partition</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">dt_month</span><span class="p">:</span> <span class="n">int32</span><span class="p">,</span> <span class="n">dt_day</span><span class="p">:</span> <span class="n">date32</span><span class="p">[</span><span class="n">day</span><span class="p">]</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-3" name="__codelineno-41-3" href="#__codelineno-41-3"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">dt_month</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-41-4" name="__codelineno-41-4" href="#__codelineno-41-4"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">dt_day</span><span class="p">:</span> <span class="n">date32</span><span class="p">[</span><span class="n">day</span><span class="p">]</span> |
| <a id="__codelineno-41-5" name="__codelineno-41-5" href="#__codelineno-41-5"></a><span class="n">spec_id</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-6" name="__codelineno-41-6" href="#__codelineno-41-6"></a><span class="n">record_count</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-7" name="__codelineno-41-7" href="#__codelineno-41-7"></a><span class="n">file_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-8" name="__codelineno-41-8" href="#__codelineno-41-8"></a><span class="n">total_data_file_size_in_bytes</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-9" name="__codelineno-41-9" href="#__codelineno-41-9"></a><span class="n">position_delete_record_count</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-10" name="__codelineno-41-10" href="#__codelineno-41-10"></a><span class="n">position_delete_file_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-11" name="__codelineno-41-11" href="#__codelineno-41-11"></a><span class="n">equality_delete_record_count</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-12" name="__codelineno-41-12" href="#__codelineno-41-12"></a><span class="n">equality_delete_file_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-13" name="__codelineno-41-13" href="#__codelineno-41-13"></a><span class="n">last_updated_at</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">ms</span><span class="p">]</span> |
| <a id="__codelineno-41-14" name="__codelineno-41-14" href="#__codelineno-41-14"></a><span class="n">last_updated_snapshot_id</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-41-15" name="__codelineno-41-15" href="#__codelineno-41-15"></a><span class="o">----</span> |
| <a id="__codelineno-41-16" name="__codelineno-41-16" href="#__codelineno-41-16"></a><span class="n">partition</span><span class="p">:</span> <span class="p">[</span> |
| <a id="__codelineno-41-17" name="__codelineno-41-17" href="#__codelineno-41-17"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-41-18" name="__codelineno-41-18" href="#__codelineno-41-18"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-41-19" name="__codelineno-41-19" href="#__codelineno-41-19"></a><span class="p">[</span><span class="n">null</span><span class="p">,</span><span class="n">null</span><span class="p">,</span><span class="mi">612</span><span class="p">]</span> |
| <a id="__codelineno-41-20" name="__codelineno-41-20" href="#__codelineno-41-20"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">date32</span><span class="p">[</span><span class="n">day</span><span class="p">]</span> |
| <a id="__codelineno-41-21" name="__codelineno-41-21" href="#__codelineno-41-21"></a><span class="p">[</span><span class="n">null</span><span class="p">,</span><span class="mi">2021</span><span class="o">-</span><span class="mi">02</span><span class="o">-</span><span class="mi">01</span><span class="p">,</span><span class="n">null</span><span class="p">]]</span> |
| <a id="__codelineno-41-22" name="__codelineno-41-22" href="#__codelineno-41-22"></a><span class="n">spec_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-41-23" name="__codelineno-41-23" href="#__codelineno-41-23"></a><span class="n">record_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">]]</span> |
| <a id="__codelineno-41-24" name="__codelineno-41-24" href="#__codelineno-41-24"></a><span class="n">file_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">]]</span> |
| <a id="__codelineno-41-25" name="__codelineno-41-25" href="#__codelineno-41-25"></a><span class="n">total_data_file_size_in_bytes</span><span class="p">:</span> <span class="p">[[</span><span class="mi">641</span><span class="p">,</span><span class="mi">641</span><span class="p">,</span><span class="mi">1260</span><span class="p">]]</span> |
| <a id="__codelineno-41-26" name="__codelineno-41-26" href="#__codelineno-41-26"></a><span class="n">position_delete_record_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-41-27" name="__codelineno-41-27" href="#__codelineno-41-27"></a><span class="n">position_delete_file_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-41-28" name="__codelineno-41-28" href="#__codelineno-41-28"></a><span class="n">equality_delete_record_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-41-29" name="__codelineno-41-29" href="#__codelineno-41-29"></a><span class="n">equality_delete_file_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-41-30" name="__codelineno-41-30" href="#__codelineno-41-30"></a><span class="n">last_updated_at</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2024</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">13</span> <span class="mi">18</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">35.981</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">13</span> <span class="mi">18</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">35.465</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">13</span> <span class="mi">18</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mf">35.003</span><span class="p">]]</span> |
| </code></pre></div> |
| <h3 id="entries">Entries<a class="headerlink" href="#entries" title="Permanent link">¶</a></h3> |
| <p>To show all the table's current manifest entries for both data and delete files.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-42-1" name="__codelineno-42-1" href="#__codelineno-42-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">entries</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-43-1" name="__codelineno-43-1" href="#__codelineno-43-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-43-2" name="__codelineno-43-2" href="#__codelineno-43-2"></a><span class="n">status</span><span class="p">:</span> <span class="n">int8</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-3" name="__codelineno-43-3" href="#__codelineno-43-3"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-4" name="__codelineno-43-4" href="#__codelineno-43-4"></a><span class="n">sequence_number</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-5" name="__codelineno-43-5" href="#__codelineno-43-5"></a><span class="n">file_sequence_number</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-6" name="__codelineno-43-6" href="#__codelineno-43-6"></a><span class="n">data_file</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">content</span><span class="p">:</span> <span class="n">int8</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">file_path</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">file_format</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">partition</span><span class="p">:</span> <span class="n">struct</span><span class="o"><></span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">record_count</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">file_size_in_bytes</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">column_sizes</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span><span class="p">,</span> <span class="n">value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span><span class="p">,</span> <span class="n">null_value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span><span class="p">,</span> <span class="n">nan_value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span><span class="p">,</span> <span class="n">lower_bounds</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span><span class="p">,</span> <span class="n">upper_bounds</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span><span class="p">,</span> <span class="n">key_metadata</span><span class="p">:</span> <span class="n">binary</span><span class="p">,</span> <span class="n">split_offsets</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span><span class="p">,</span> <span class="n">equality_ids</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span><span class="p">,</span> <span class="n">sort_order_id</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-7" name="__codelineno-43-7" href="#__codelineno-43-7"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">content</span><span class="p">:</span> <span class="n">int8</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-8" name="__codelineno-43-8" href="#__codelineno-43-8"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">file_path</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-9" name="__codelineno-43-9" href="#__codelineno-43-9"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">file_format</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-10" name="__codelineno-43-10" href="#__codelineno-43-10"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">partition</span><span class="p">:</span> <span class="n">struct</span><span class="o"><></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-11" name="__codelineno-43-11" href="#__codelineno-43-11"></a> <span class="n">child</span> <span class="mi">4</span><span class="p">,</span> <span class="n">record_count</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-12" name="__codelineno-43-12" href="#__codelineno-43-12"></a> <span class="n">child</span> <span class="mi">5</span><span class="p">,</span> <span class="n">file_size_in_bytes</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-13" name="__codelineno-43-13" href="#__codelineno-43-13"></a> <span class="n">child</span> <span class="mi">6</span><span class="p">,</span> <span class="n">column_sizes</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-14" name="__codelineno-43-14" href="#__codelineno-43-14"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-15" name="__codelineno-43-15" href="#__codelineno-43-15"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-16" name="__codelineno-43-16" href="#__codelineno-43-16"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-17" name="__codelineno-43-17" href="#__codelineno-43-17"></a> <span class="n">child</span> <span class="mi">7</span><span class="p">,</span> <span class="n">value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-18" name="__codelineno-43-18" href="#__codelineno-43-18"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-19" name="__codelineno-43-19" href="#__codelineno-43-19"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-20" name="__codelineno-43-20" href="#__codelineno-43-20"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-21" name="__codelineno-43-21" href="#__codelineno-43-21"></a> <span class="n">child</span> <span class="mi">8</span><span class="p">,</span> <span class="n">null_value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-22" name="__codelineno-43-22" href="#__codelineno-43-22"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-23" name="__codelineno-43-23" href="#__codelineno-43-23"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-24" name="__codelineno-43-24" href="#__codelineno-43-24"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-25" name="__codelineno-43-25" href="#__codelineno-43-25"></a> <span class="n">child</span> <span class="mi">9</span><span class="p">,</span> <span class="n">nan_value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-26" name="__codelineno-43-26" href="#__codelineno-43-26"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-27" name="__codelineno-43-27" href="#__codelineno-43-27"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-28" name="__codelineno-43-28" href="#__codelineno-43-28"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-29" name="__codelineno-43-29" href="#__codelineno-43-29"></a> <span class="n">child</span> <span class="mi">10</span><span class="p">,</span> <span class="n">lower_bounds</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span> |
| <a id="__codelineno-43-30" name="__codelineno-43-30" href="#__codelineno-43-30"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-31" name="__codelineno-43-31" href="#__codelineno-43-31"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-32" name="__codelineno-43-32" href="#__codelineno-43-32"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> |
| <a id="__codelineno-43-33" name="__codelineno-43-33" href="#__codelineno-43-33"></a> <span class="n">child</span> <span class="mi">11</span><span class="p">,</span> <span class="n">upper_bounds</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span> |
| <a id="__codelineno-43-34" name="__codelineno-43-34" href="#__codelineno-43-34"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-35" name="__codelineno-43-35" href="#__codelineno-43-35"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-36" name="__codelineno-43-36" href="#__codelineno-43-36"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> |
| <a id="__codelineno-43-37" name="__codelineno-43-37" href="#__codelineno-43-37"></a> <span class="n">child</span> <span class="mi">12</span><span class="p">,</span> <span class="n">key_metadata</span><span class="p">:</span> <span class="n">binary</span> |
| <a id="__codelineno-43-38" name="__codelineno-43-38" href="#__codelineno-43-38"></a> <span class="n">child</span> <span class="mi">13</span><span class="p">,</span> <span class="n">split_offsets</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-39" name="__codelineno-43-39" href="#__codelineno-43-39"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-40" name="__codelineno-43-40" href="#__codelineno-43-40"></a> <span class="n">child</span> <span class="mi">14</span><span class="p">,</span> <span class="n">equality_ids</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span> |
| <a id="__codelineno-43-41" name="__codelineno-43-41" href="#__codelineno-43-41"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-43-42" name="__codelineno-43-42" href="#__codelineno-43-42"></a> <span class="n">child</span> <span class="mi">15</span><span class="p">,</span> <span class="n">sort_order_id</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-43-43" name="__codelineno-43-43" href="#__codelineno-43-43"></a><span class="n">readable_metrics</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">city</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">lat</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">long</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span><span class="o">></span> |
| <a id="__codelineno-43-44" name="__codelineno-43-44" href="#__codelineno-43-44"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">city</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-45" name="__codelineno-43-45" href="#__codelineno-43-45"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-46" name="__codelineno-43-46" href="#__codelineno-43-46"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-47" name="__codelineno-43-47" href="#__codelineno-43-47"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-48" name="__codelineno-43-48" href="#__codelineno-43-48"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-49" name="__codelineno-43-49" href="#__codelineno-43-49"></a> <span class="n">child</span> <span class="mi">4</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-43-50" name="__codelineno-43-50" href="#__codelineno-43-50"></a> <span class="n">child</span> <span class="mi">5</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-43-51" name="__codelineno-43-51" href="#__codelineno-43-51"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">lat</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-52" name="__codelineno-43-52" href="#__codelineno-43-52"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-53" name="__codelineno-43-53" href="#__codelineno-43-53"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-54" name="__codelineno-43-54" href="#__codelineno-43-54"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-55" name="__codelineno-43-55" href="#__codelineno-43-55"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-56" name="__codelineno-43-56" href="#__codelineno-43-56"></a> <span class="n">child</span> <span class="mi">4</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-57" name="__codelineno-43-57" href="#__codelineno-43-57"></a> <span class="n">child</span> <span class="mi">5</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-58" name="__codelineno-43-58" href="#__codelineno-43-58"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">long</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-59" name="__codelineno-43-59" href="#__codelineno-43-59"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-60" name="__codelineno-43-60" href="#__codelineno-43-60"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-61" name="__codelineno-43-61" href="#__codelineno-43-61"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-62" name="__codelineno-43-62" href="#__codelineno-43-62"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-63" name="__codelineno-43-63" href="#__codelineno-43-63"></a> <span class="n">child</span> <span class="mi">4</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-64" name="__codelineno-43-64" href="#__codelineno-43-64"></a> <span class="n">child</span> <span class="mi">5</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-65" name="__codelineno-43-65" href="#__codelineno-43-65"></a><span class="o">----</span> |
| <a id="__codelineno-43-66" name="__codelineno-43-66" href="#__codelineno-43-66"></a><span class="n">status</span><span class="p">:</span> <span class="p">[[</span><span class="mi">1</span><span class="p">]]</span> |
| <a id="__codelineno-43-67" name="__codelineno-43-67" href="#__codelineno-43-67"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">6245626162224016531</span><span class="p">]]</span> |
| <a id="__codelineno-43-68" name="__codelineno-43-68" href="#__codelineno-43-68"></a><span class="n">sequence_number</span><span class="p">:</span> <span class="p">[[</span><span class="mi">1</span><span class="p">]]</span> |
| <a id="__codelineno-43-69" name="__codelineno-43-69" href="#__codelineno-43-69"></a><span class="n">file_sequence_number</span><span class="p">:</span> <span class="p">[[</span><span class="mi">1</span><span class="p">]]</span> |
| <a id="__codelineno-43-70" name="__codelineno-43-70" href="#__codelineno-43-70"></a><span class="n">data_file</span><span class="p">:</span> <span class="p">[</span> |
| <a id="__codelineno-43-71" name="__codelineno-43-71" href="#__codelineno-43-71"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-72" name="__codelineno-43-72" href="#__codelineno-43-72"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int8</span> |
| <a id="__codelineno-43-73" name="__codelineno-43-73" href="#__codelineno-43-73"></a><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <a id="__codelineno-43-74" name="__codelineno-43-74" href="#__codelineno-43-74"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-43-75" name="__codelineno-43-75" href="#__codelineno-43-75"></a><span class="p">[</span><span class="s2">"s3://warehouse/default/cities/data/00000-0-80766b66-e558-4150-a5cf-85e4c609b9fe.parquet"</span><span class="p">]</span> |
| <a id="__codelineno-43-76" name="__codelineno-43-76" href="#__codelineno-43-76"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-43-77" name="__codelineno-43-77" href="#__codelineno-43-77"></a><span class="p">[</span><span class="s2">"PARQUET"</span><span class="p">]</span> |
| <a id="__codelineno-43-78" name="__codelineno-43-78" href="#__codelineno-43-78"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><></span> |
| <a id="__codelineno-43-79" name="__codelineno-43-79" href="#__codelineno-43-79"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-80" name="__codelineno-43-80" href="#__codelineno-43-80"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">4</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-81" name="__codelineno-43-81" href="#__codelineno-43-81"></a><span class="p">[</span><span class="mi">4</span><span class="p">]</span> |
| <a id="__codelineno-43-82" name="__codelineno-43-82" href="#__codelineno-43-82"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">5</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-83" name="__codelineno-43-83" href="#__codelineno-43-83"></a><span class="p">[</span><span class="mi">1656</span><span class="p">]</span> |
| <a id="__codelineno-43-84" name="__codelineno-43-84" href="#__codelineno-43-84"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">6</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-85" name="__codelineno-43-85" href="#__codelineno-43-85"></a><span class="p">[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">140</span><span class="p">,</span><span class="mi">135</span><span class="p">,</span><span class="mi">135</span><span class="p">]]</span> |
| <a id="__codelineno-43-86" name="__codelineno-43-86" href="#__codelineno-43-86"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">7</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-87" name="__codelineno-43-87" href="#__codelineno-43-87"></a><span class="p">[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">4</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">4</span><span class="p">]]</span> |
| <a id="__codelineno-43-88" name="__codelineno-43-88" href="#__codelineno-43-88"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">8</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-89" name="__codelineno-43-89" href="#__codelineno-43-89"></a><span class="p">[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-43-90" name="__codelineno-43-90" href="#__codelineno-43-90"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">9</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-91" name="__codelineno-43-91" href="#__codelineno-43-91"></a><span class="p">[</span><span class="n">keys</span><span class="p">:[]</span><span class="n">values</span><span class="p">:[]]</span> |
| <a id="__codelineno-43-92" name="__codelineno-43-92" href="#__codelineno-43-92"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">10</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span> |
| <a id="__codelineno-43-93" name="__codelineno-43-93" href="#__codelineno-43-93"></a><span class="p">[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">416</span><span class="n">D7374657264616D</span><span class="p">,</span><span class="mi">8602</span><span class="n">B68311E34240</span><span class="p">,</span><span class="mi">3</span><span class="n">A77BB5E9A9B5EC0</span><span class="p">]]</span> |
| <a id="__codelineno-43-94" name="__codelineno-43-94" href="#__codelineno-43-94"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">11</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span> |
| <a id="__codelineno-43-95" name="__codelineno-43-95" href="#__codelineno-43-95"></a><span class="p">[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mf">53616E204672616</span><span class="n">E636973636F</span><span class="p">,</span><span class="n">F5BEF1B5678E4A40</span><span class="p">,</span><span class="mi">304</span><span class="n">CA60A46651840</span><span class="p">]]</span> |
| <a id="__codelineno-43-96" name="__codelineno-43-96" href="#__codelineno-43-96"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">12</span> <span class="nb">type</span><span class="p">:</span> <span class="n">binary</span> |
| <a id="__codelineno-43-97" name="__codelineno-43-97" href="#__codelineno-43-97"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-43-98" name="__codelineno-43-98" href="#__codelineno-43-98"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">13</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-43-99" name="__codelineno-43-99" href="#__codelineno-43-99"></a><span class="p">[[</span><span class="mi">4</span><span class="p">]]</span> |
| <a id="__codelineno-43-100" name="__codelineno-43-100" href="#__codelineno-43-100"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">14</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span> |
| <a id="__codelineno-43-101" name="__codelineno-43-101" href="#__codelineno-43-101"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-43-102" name="__codelineno-43-102" href="#__codelineno-43-102"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">15</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-43-103" name="__codelineno-43-103" href="#__codelineno-43-103"></a><span class="p">[</span><span class="n">null</span><span class="p">]]</span> |
| <a id="__codelineno-43-104" name="__codelineno-43-104" href="#__codelineno-43-104"></a><span class="n">readable_metrics</span><span class="p">:</span> <span class="p">[</span> |
| <a id="__codelineno-43-105" name="__codelineno-43-105" href="#__codelineno-43-105"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-106" name="__codelineno-43-106" href="#__codelineno-43-106"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span><span class="o">></span> |
| <a id="__codelineno-43-107" name="__codelineno-43-107" href="#__codelineno-43-107"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-108" name="__codelineno-43-108" href="#__codelineno-43-108"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-109" name="__codelineno-43-109" href="#__codelineno-43-109"></a><span class="p">[</span><span class="mi">140</span><span class="p">]</span> |
| <a id="__codelineno-43-110" name="__codelineno-43-110" href="#__codelineno-43-110"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-111" name="__codelineno-43-111" href="#__codelineno-43-111"></a><span class="p">[</span><span class="mi">4</span><span class="p">]</span> |
| <a id="__codelineno-43-112" name="__codelineno-43-112" href="#__codelineno-43-112"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-113" name="__codelineno-43-113" href="#__codelineno-43-113"></a><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <a id="__codelineno-43-114" name="__codelineno-43-114" href="#__codelineno-43-114"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-115" name="__codelineno-43-115" href="#__codelineno-43-115"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-43-116" name="__codelineno-43-116" href="#__codelineno-43-116"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">4</span> <span class="nb">type</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-43-117" name="__codelineno-43-117" href="#__codelineno-43-117"></a><span class="p">[</span><span class="s2">"Amsterdam"</span><span class="p">]</span> |
| <a id="__codelineno-43-118" name="__codelineno-43-118" href="#__codelineno-43-118"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">5</span> <span class="nb">type</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-43-119" name="__codelineno-43-119" href="#__codelineno-43-119"></a><span class="p">[</span><span class="s2">"San Francisco"</span><span class="p">]</span> |
| <a id="__codelineno-43-120" name="__codelineno-43-120" href="#__codelineno-43-120"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> |
| <a id="__codelineno-43-121" name="__codelineno-43-121" href="#__codelineno-43-121"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-122" name="__codelineno-43-122" href="#__codelineno-43-122"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-123" name="__codelineno-43-123" href="#__codelineno-43-123"></a><span class="p">[</span><span class="mi">135</span><span class="p">]</span> |
| <a id="__codelineno-43-124" name="__codelineno-43-124" href="#__codelineno-43-124"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-125" name="__codelineno-43-125" href="#__codelineno-43-125"></a><span class="p">[</span><span class="mi">4</span><span class="p">]</span> |
| <a id="__codelineno-43-126" name="__codelineno-43-126" href="#__codelineno-43-126"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-127" name="__codelineno-43-127" href="#__codelineno-43-127"></a><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <a id="__codelineno-43-128" name="__codelineno-43-128" href="#__codelineno-43-128"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-129" name="__codelineno-43-129" href="#__codelineno-43-129"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-43-130" name="__codelineno-43-130" href="#__codelineno-43-130"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">4</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-131" name="__codelineno-43-131" href="#__codelineno-43-131"></a><span class="p">[</span><span class="mf">37.773972</span><span class="p">]</span> |
| <a id="__codelineno-43-132" name="__codelineno-43-132" href="#__codelineno-43-132"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">5</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-133" name="__codelineno-43-133" href="#__codelineno-43-133"></a><span class="p">[</span><span class="mf">53.11254</span><span class="p">]</span> |
| <a id="__codelineno-43-134" name="__codelineno-43-134" href="#__codelineno-43-134"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> |
| <a id="__codelineno-43-135" name="__codelineno-43-135" href="#__codelineno-43-135"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-43-136" name="__codelineno-43-136" href="#__codelineno-43-136"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-137" name="__codelineno-43-137" href="#__codelineno-43-137"></a><span class="p">[</span><span class="mi">135</span><span class="p">]</span> |
| <a id="__codelineno-43-138" name="__codelineno-43-138" href="#__codelineno-43-138"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-139" name="__codelineno-43-139" href="#__codelineno-43-139"></a><span class="p">[</span><span class="mi">4</span><span class="p">]</span> |
| <a id="__codelineno-43-140" name="__codelineno-43-140" href="#__codelineno-43-140"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-141" name="__codelineno-43-141" href="#__codelineno-43-141"></a><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <a id="__codelineno-43-142" name="__codelineno-43-142" href="#__codelineno-43-142"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-43-143" name="__codelineno-43-143" href="#__codelineno-43-143"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-43-144" name="__codelineno-43-144" href="#__codelineno-43-144"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">4</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-145" name="__codelineno-43-145" href="#__codelineno-43-145"></a><span class="p">[</span><span class="o">-</span><span class="mf">122.431297</span><span class="p">]</span> |
| <a id="__codelineno-43-146" name="__codelineno-43-146" href="#__codelineno-43-146"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">5</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-43-147" name="__codelineno-43-147" href="#__codelineno-43-147"></a><span class="p">[</span><span class="mf">6.0989</span><span class="p">]]</span> |
| </code></pre></div> |
| <h3 id="references">References<a class="headerlink" href="#references" title="Permanent link">¶</a></h3> |
| <p>To show a table's known snapshot references:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-44-1" name="__codelineno-44-1" href="#__codelineno-44-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">refs</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-45-1" name="__codelineno-45-1" href="#__codelineno-45-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-45-2" name="__codelineno-45-2" href="#__codelineno-45-2"></a><span class="n">name</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-45-3" name="__codelineno-45-3" href="#__codelineno-45-3"></a><span class="nb">type</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-45-4" name="__codelineno-45-4" href="#__codelineno-45-4"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-45-5" name="__codelineno-45-5" href="#__codelineno-45-5"></a><span class="n">max_reference_age_in_ms</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-45-6" name="__codelineno-45-6" href="#__codelineno-45-6"></a><span class="n">min_snapshots_to_keep</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-45-7" name="__codelineno-45-7" href="#__codelineno-45-7"></a><span class="n">max_snapshot_age_in_ms</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-45-8" name="__codelineno-45-8" href="#__codelineno-45-8"></a><span class="o">----</span> |
| <a id="__codelineno-45-9" name="__codelineno-45-9" href="#__codelineno-45-9"></a><span class="n">name</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"main"</span><span class="p">,</span><span class="s2">"testTag"</span><span class="p">]]</span> |
| <a id="__codelineno-45-10" name="__codelineno-45-10" href="#__codelineno-45-10"></a><span class="nb">type</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"BRANCH"</span><span class="p">,</span><span class="s2">"TAG"</span><span class="p">]]</span> |
| <a id="__codelineno-45-11" name="__codelineno-45-11" href="#__codelineno-45-11"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2278002651076891950</span><span class="p">,</span><span class="mi">2278002651076891950</span><span class="p">]]</span> |
| <a id="__codelineno-45-12" name="__codelineno-45-12" href="#__codelineno-45-12"></a><span class="n">max_reference_age_in_ms</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">604800000</span><span class="p">]]</span> |
| <a id="__codelineno-45-13" name="__codelineno-45-13" href="#__codelineno-45-13"></a><span class="n">min_snapshots_to_keep</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">10</span><span class="p">]]</span> |
| <a id="__codelineno-45-14" name="__codelineno-45-14" href="#__codelineno-45-14"></a><span class="n">max_snapshot_age_in_ms</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">604800000</span><span class="p">]]</span> |
| </code></pre></div> |
| <h3 id="manifests">Manifests<a class="headerlink" href="#manifests" title="Permanent link">¶</a></h3> |
| <p>To show a table's current file manifests:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-46-1" name="__codelineno-46-1" href="#__codelineno-46-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">manifests</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-47-1" name="__codelineno-47-1" href="#__codelineno-47-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-47-2" name="__codelineno-47-2" href="#__codelineno-47-2"></a><span class="n">content</span><span class="p">:</span> <span class="n">int8</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-3" name="__codelineno-47-3" href="#__codelineno-47-3"></a><span class="n">path</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-4" name="__codelineno-47-4" href="#__codelineno-47-4"></a><span class="n">length</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-5" name="__codelineno-47-5" href="#__codelineno-47-5"></a><span class="n">partition_spec_id</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-6" name="__codelineno-47-6" href="#__codelineno-47-6"></a><span class="n">added_snapshot_id</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-7" name="__codelineno-47-7" href="#__codelineno-47-7"></a><span class="n">added_data_files_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-8" name="__codelineno-47-8" href="#__codelineno-47-8"></a><span class="n">existing_data_files_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-9" name="__codelineno-47-9" href="#__codelineno-47-9"></a><span class="n">deleted_data_files_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-10" name="__codelineno-47-10" href="#__codelineno-47-10"></a><span class="n">added_delete_files_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-11" name="__codelineno-47-11" href="#__codelineno-47-11"></a><span class="n">existing_delete_files_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-12" name="__codelineno-47-12" href="#__codelineno-47-12"></a><span class="n">deleted_delete_files_count</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-13" name="__codelineno-47-13" href="#__codelineno-47-13"></a><span class="n">partition_summaries</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">contains_null</span><span class="p">:</span> <span class="nb">bool</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">contains_nan</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span><span class="o">>></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-14" name="__codelineno-47-14" href="#__codelineno-47-14"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">contains_null</span><span class="p">:</span> <span class="nb">bool</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">contains_nan</span><span class="p">:</span> <span class="nb">bool</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span><span class="o">></span> |
| <a id="__codelineno-47-15" name="__codelineno-47-15" href="#__codelineno-47-15"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">contains_null</span><span class="p">:</span> <span class="nb">bool</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-16" name="__codelineno-47-16" href="#__codelineno-47-16"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">contains_nan</span><span class="p">:</span> <span class="nb">bool</span> |
| <a id="__codelineno-47-17" name="__codelineno-47-17" href="#__codelineno-47-17"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-47-18" name="__codelineno-47-18" href="#__codelineno-47-18"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-47-19" name="__codelineno-47-19" href="#__codelineno-47-19"></a><span class="o">----</span> |
| <a id="__codelineno-47-20" name="__codelineno-47-20" href="#__codelineno-47-20"></a><span class="n">content</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-47-21" name="__codelineno-47-21" href="#__codelineno-47-21"></a><span class="n">path</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"s3://warehouse/default/table_metadata_manifests/metadata/3bf5b4c6-a7a4-4b43-a6ce-ca2b4887945a-m0.avro"</span><span class="p">]]</span> |
| <a id="__codelineno-47-22" name="__codelineno-47-22" href="#__codelineno-47-22"></a><span class="n">length</span><span class="p">:</span> <span class="p">[[</span><span class="mi">6886</span><span class="p">]]</span> |
| <a id="__codelineno-47-23" name="__codelineno-47-23" href="#__codelineno-47-23"></a><span class="n">partition_spec_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-47-24" name="__codelineno-47-24" href="#__codelineno-47-24"></a><span class="n">added_snapshot_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">3815834705531553721</span><span class="p">]]</span> |
| <a id="__codelineno-47-25" name="__codelineno-47-25" href="#__codelineno-47-25"></a><span class="n">added_data_files_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">1</span><span class="p">]]</span> |
| <a id="__codelineno-47-26" name="__codelineno-47-26" href="#__codelineno-47-26"></a><span class="n">existing_data_files_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-47-27" name="__codelineno-47-27" href="#__codelineno-47-27"></a><span class="n">deleted_data_files_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-47-28" name="__codelineno-47-28" href="#__codelineno-47-28"></a><span class="n">added_delete_files_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-47-29" name="__codelineno-47-29" href="#__codelineno-47-29"></a><span class="n">existing_delete_files_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-47-30" name="__codelineno-47-30" href="#__codelineno-47-30"></a><span class="n">deleted_delete_files_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-47-31" name="__codelineno-47-31" href="#__codelineno-47-31"></a><span class="n">partition_summaries</span><span class="p">:</span> <span class="p">[[</span> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-47-32" name="__codelineno-47-32" href="#__codelineno-47-32"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">bool</span> |
| <a id="__codelineno-47-33" name="__codelineno-47-33" href="#__codelineno-47-33"></a><span class="p">[</span><span class="n">false</span><span class="p">]</span> |
| <a id="__codelineno-47-34" name="__codelineno-47-34" href="#__codelineno-47-34"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="nb">bool</span> |
| <a id="__codelineno-47-35" name="__codelineno-47-35" href="#__codelineno-47-35"></a><span class="p">[</span><span class="n">false</span><span class="p">]</span> |
| <a id="__codelineno-47-36" name="__codelineno-47-36" href="#__codelineno-47-36"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-47-37" name="__codelineno-47-37" href="#__codelineno-47-37"></a><span class="p">[</span><span class="s2">"test"</span><span class="p">]</span> |
| <a id="__codelineno-47-38" name="__codelineno-47-38" href="#__codelineno-47-38"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">string</span> |
| <a id="__codelineno-47-39" name="__codelineno-47-39" href="#__codelineno-47-39"></a><span class="p">[</span><span class="s2">"test"</span><span class="p">]]]</span> |
| </code></pre></div> |
| <h3 id="metadata-log-entries">Metadata Log Entries<a class="headerlink" href="#metadata-log-entries" title="Permanent link">¶</a></h3> |
| <p>To show table metadata log entries:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-48-1" name="__codelineno-48-1" href="#__codelineno-48-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">metadata_log_entries</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-49-1" name="__codelineno-49-1" href="#__codelineno-49-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-49-2" name="__codelineno-49-2" href="#__codelineno-49-2"></a><span class="n">timestamp</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">ms</span><span class="p">]</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-49-3" name="__codelineno-49-3" href="#__codelineno-49-3"></a><span class="n">file</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-49-4" name="__codelineno-49-4" href="#__codelineno-49-4"></a><span class="n">latest_snapshot_id</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-49-5" name="__codelineno-49-5" href="#__codelineno-49-5"></a><span class="n">latest_schema_id</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-49-6" name="__codelineno-49-6" href="#__codelineno-49-6"></a><span class="n">latest_sequence_number</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-49-7" name="__codelineno-49-7" href="#__codelineno-49-7"></a><span class="o">----</span> |
| <a id="__codelineno-49-8" name="__codelineno-49-8" href="#__codelineno-49-8"></a><span class="n">timestamp</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2024</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">28</span> <span class="mi">17</span><span class="p">:</span><span class="mi">03</span><span class="p">:</span><span class="mf">00.214</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">28</span> <span class="mi">17</span><span class="p">:</span><span class="mi">03</span><span class="p">:</span><span class="mf">00.352</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">28</span> <span class="mi">17</span><span class="p">:</span><span class="mi">03</span><span class="p">:</span><span class="mf">00.445</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">28</span> <span class="mi">17</span><span class="p">:</span><span class="mi">03</span><span class="p">:</span><span class="mf">00.498</span><span class="p">]]</span> |
| <a id="__codelineno-49-9" name="__codelineno-49-9" href="#__codelineno-49-9"></a><span class="n">file</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"s3://warehouse/default/table_metadata_log_entries/metadata/00000-0b3b643b-0f3a-4787-83ad-601ba57b7319.metadata.json"</span><span class="p">,</span><span class="s2">"s3://warehouse/default/table_metadata_log_entries/metadata/00001-f74e4b2c-0f89-4f55-822d-23d099fd7d54.metadata.json"</span><span class="p">,</span><span class="s2">"s3://warehouse/default/table_metadata_log_entries/metadata/00002-97e31507-e4d9-4438-aff1-3c0c5304d271.metadata.json"</span><span class="p">,</span><span class="s2">"s3://warehouse/default/table_metadata_log_entries/metadata/00003-6c8b7033-6ad8-4fe4-b64d-d70381aeaddc.metadata.json"</span><span class="p">]]</span> |
| <a id="__codelineno-49-10" name="__codelineno-49-10" href="#__codelineno-49-10"></a><span class="n">latest_snapshot_id</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">3958871664825505738</span><span class="p">,</span><span class="mi">1289234307021405706</span><span class="p">,</span><span class="mi">7640277914614648349</span><span class="p">]]</span> |
| <a id="__codelineno-49-11" name="__codelineno-49-11" href="#__codelineno-49-11"></a><span class="n">latest_schema_id</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-49-12" name="__codelineno-49-12" href="#__codelineno-49-12"></a><span class="n">latest_sequence_number</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| </code></pre></div> |
| <h3 id="history">History<a class="headerlink" href="#history" title="Permanent link">¶</a></h3> |
| <p>To show a table's history:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-50-1" name="__codelineno-50-1" href="#__codelineno-50-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">history</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-51-1" name="__codelineno-51-1" href="#__codelineno-51-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-51-2" name="__codelineno-51-2" href="#__codelineno-51-2"></a><span class="n">made_current_at</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">ms</span><span class="p">]</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-51-3" name="__codelineno-51-3" href="#__codelineno-51-3"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-51-4" name="__codelineno-51-4" href="#__codelineno-51-4"></a><span class="n">parent_id</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-51-5" name="__codelineno-51-5" href="#__codelineno-51-5"></a><span class="n">is_current_ancestor</span><span class="p">:</span> <span class="nb">bool</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-51-6" name="__codelineno-51-6" href="#__codelineno-51-6"></a><span class="o">----</span> |
| <a id="__codelineno-51-7" name="__codelineno-51-7" href="#__codelineno-51-7"></a><span class="n">made_current_at</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2024</span><span class="o">-</span><span class="mi">06</span><span class="o">-</span><span class="mi">18</span> <span class="mi">16</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mf">48.768</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">06</span><span class="o">-</span><span class="mi">18</span> <span class="mi">16</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mf">49.240</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">06</span><span class="o">-</span><span class="mi">18</span> <span class="mi">16</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mf">49.343</span><span class="p">,</span><span class="mi">2024</span><span class="o">-</span><span class="mi">06</span><span class="o">-</span><span class="mi">18</span> <span class="mi">16</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mf">49.511</span><span class="p">]]</span> |
| <a id="__codelineno-51-8" name="__codelineno-51-8" href="#__codelineno-51-8"></a><span class="n">snapshot_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">4358109269873137077</span><span class="p">,</span><span class="mi">3380769165026943338</span><span class="p">,</span><span class="mi">4358109269873137077</span><span class="p">,</span><span class="mi">3089420140651211776</span><span class="p">]]</span> |
| <a id="__codelineno-51-9" name="__codelineno-51-9" href="#__codelineno-51-9"></a><span class="n">parent_id</span><span class="p">:</span> <span class="p">[[</span><span class="n">null</span><span class="p">,</span><span class="mi">4358109269873137077</span><span class="p">,</span><span class="n">null</span><span class="p">,</span><span class="mi">4358109269873137077</span><span class="p">]]</span> |
| <a id="__codelineno-51-10" name="__codelineno-51-10" href="#__codelineno-51-10"></a><span class="n">is_current_ancestor</span><span class="p">:</span> <span class="p">[[</span><span class="n">true</span><span class="p">,</span><span class="n">false</span><span class="p">,</span><span class="n">true</span><span class="p">,</span><span class="n">true</span><span class="p">]]</span> |
| </code></pre></div> |
| <h3 id="files">Files<a class="headerlink" href="#files" title="Permanent link">¶</a></h3> |
| <p>Inspect the data files in the current snapshot of the table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-52-1" name="__codelineno-52-1" href="#__codelineno-52-1"></a><span class="n">table</span><span class="o">.</span><span class="n">inspect</span><span class="o">.</span><span class="n">files</span><span class="p">()</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-53-1" name="__codelineno-53-1" href="#__codelineno-53-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-53-2" name="__codelineno-53-2" href="#__codelineno-53-2"></a><span class="n">content</span><span class="p">:</span> <span class="n">int8</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-3" name="__codelineno-53-3" href="#__codelineno-53-3"></a><span class="n">file_path</span><span class="p">:</span> <span class="n">string</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-4" name="__codelineno-53-4" href="#__codelineno-53-4"></a><span class="n">file_format</span><span class="p">:</span> <span class="n">dictionary</span><span class="o"><</span><span class="n">values</span><span class="o">=</span><span class="n">string</span><span class="p">,</span> <span class="n">indices</span><span class="o">=</span><span class="n">int32</span><span class="p">,</span> <span class="n">ordered</span><span class="o">=</span><span class="mi">0</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-5" name="__codelineno-53-5" href="#__codelineno-53-5"></a><span class="n">spec_id</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-6" name="__codelineno-53-6" href="#__codelineno-53-6"></a><span class="n">record_count</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-7" name="__codelineno-53-7" href="#__codelineno-53-7"></a><span class="n">file_size_in_bytes</span><span class="p">:</span> <span class="n">int64</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-8" name="__codelineno-53-8" href="#__codelineno-53-8"></a><span class="n">column_sizes</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-53-9" name="__codelineno-53-9" href="#__codelineno-53-9"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-10" name="__codelineno-53-10" href="#__codelineno-53-10"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-11" name="__codelineno-53-11" href="#__codelineno-53-11"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-12" name="__codelineno-53-12" href="#__codelineno-53-12"></a><span class="n">value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-53-13" name="__codelineno-53-13" href="#__codelineno-53-13"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-14" name="__codelineno-53-14" href="#__codelineno-53-14"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-15" name="__codelineno-53-15" href="#__codelineno-53-15"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-16" name="__codelineno-53-16" href="#__codelineno-53-16"></a><span class="n">null_value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-53-17" name="__codelineno-53-17" href="#__codelineno-53-17"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-18" name="__codelineno-53-18" href="#__codelineno-53-18"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-19" name="__codelineno-53-19" href="#__codelineno-53-19"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-20" name="__codelineno-53-20" href="#__codelineno-53-20"></a><span class="n">nan_value_counts</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-53-21" name="__codelineno-53-21" href="#__codelineno-53-21"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-22" name="__codelineno-53-22" href="#__codelineno-53-22"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-23" name="__codelineno-53-23" href="#__codelineno-53-23"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-24" name="__codelineno-53-24" href="#__codelineno-53-24"></a><span class="n">lower_bounds</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span> |
| <a id="__codelineno-53-25" name="__codelineno-53-25" href="#__codelineno-53-25"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-26" name="__codelineno-53-26" href="#__codelineno-53-26"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-27" name="__codelineno-53-27" href="#__codelineno-53-27"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> |
| <a id="__codelineno-53-28" name="__codelineno-53-28" href="#__codelineno-53-28"></a><span class="n">upper_bounds</span><span class="p">:</span> <span class="nb">map</span><span class="o"><</span><span class="n">int32</span><span class="p">,</span> <span class="n">binary</span><span class="o">></span> |
| <a id="__codelineno-53-29" name="__codelineno-53-29" href="#__codelineno-53-29"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">entries</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-30" name="__codelineno-53-30" href="#__codelineno-53-30"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">key</span><span class="p">:</span> <span class="n">int32</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-31" name="__codelineno-53-31" href="#__codelineno-53-31"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value</span><span class="p">:</span> <span class="n">binary</span> |
| <a id="__codelineno-53-32" name="__codelineno-53-32" href="#__codelineno-53-32"></a><span class="n">key_metadata</span><span class="p">:</span> <span class="n">binary</span> |
| <a id="__codelineno-53-33" name="__codelineno-53-33" href="#__codelineno-53-33"></a><span class="n">split_offsets</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int64</span><span class="o">></span> |
| <a id="__codelineno-53-34" name="__codelineno-53-34" href="#__codelineno-53-34"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-35" name="__codelineno-53-35" href="#__codelineno-53-35"></a><span class="n">equality_ids</span><span class="p">:</span> <span class="nb">list</span><span class="o"><</span><span class="n">item</span><span class="p">:</span> <span class="n">int32</span><span class="o">></span> |
| <a id="__codelineno-53-36" name="__codelineno-53-36" href="#__codelineno-53-36"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">item</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-53-37" name="__codelineno-53-37" href="#__codelineno-53-37"></a><span class="n">sort_order_id</span><span class="p">:</span> <span class="n">int32</span> |
| <a id="__codelineno-53-38" name="__codelineno-53-38" href="#__codelineno-53-38"></a><span class="n">readable_metrics</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">city</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">large_string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">large_string</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">lat</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span><span class="p">,</span> <span class="n">long</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span><span class="o">></span> |
| <a id="__codelineno-53-39" name="__codelineno-53-39" href="#__codelineno-53-39"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">city</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">string</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-40" name="__codelineno-53-40" href="#__codelineno-53-40"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-41" name="__codelineno-53-41" href="#__codelineno-53-41"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-42" name="__codelineno-53-42" href="#__codelineno-53-42"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-43" name="__codelineno-53-43" href="#__codelineno-53-43"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-44" name="__codelineno-53-44" href="#__codelineno-53-44"></a> <span class="n">child</span> <span class="mi">4</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">large_string</span> |
| <a id="__codelineno-53-45" name="__codelineno-53-45" href="#__codelineno-53-45"></a> <span class="n">child</span> <span class="mi">5</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">large_string</span> |
| <a id="__codelineno-53-46" name="__codelineno-53-46" href="#__codelineno-53-46"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">lat</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-47" name="__codelineno-53-47" href="#__codelineno-53-47"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-48" name="__codelineno-53-48" href="#__codelineno-53-48"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-49" name="__codelineno-53-49" href="#__codelineno-53-49"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-50" name="__codelineno-53-50" href="#__codelineno-53-50"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-51" name="__codelineno-53-51" href="#__codelineno-53-51"></a> <span class="n">child</span> <span class="mi">4</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-52" name="__codelineno-53-52" href="#__codelineno-53-52"></a> <span class="n">child</span> <span class="mi">5</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-53" name="__codelineno-53-53" href="#__codelineno-53-53"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">long</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-54" name="__codelineno-53-54" href="#__codelineno-53-54"></a> <span class="n">child</span> <span class="mi">0</span><span class="p">,</span> <span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-55" name="__codelineno-53-55" href="#__codelineno-53-55"></a> <span class="n">child</span> <span class="mi">1</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-56" name="__codelineno-53-56" href="#__codelineno-53-56"></a> <span class="n">child</span> <span class="mi">2</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-57" name="__codelineno-53-57" href="#__codelineno-53-57"></a> <span class="n">child</span> <span class="mi">3</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-58" name="__codelineno-53-58" href="#__codelineno-53-58"></a> <span class="n">child</span> <span class="mi">4</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-59" name="__codelineno-53-59" href="#__codelineno-53-59"></a> <span class="n">child</span> <span class="mi">5</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-60" name="__codelineno-53-60" href="#__codelineno-53-60"></a><span class="o">----</span> |
| <a id="__codelineno-53-61" name="__codelineno-53-61" href="#__codelineno-53-61"></a><span class="n">content</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-53-62" name="__codelineno-53-62" href="#__codelineno-53-62"></a><span class="n">file_path</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"s3://warehouse/default/table_metadata_files/data/00000-0-9ea7d222-6457-467f-bad5-6fb125c9aa5f.parquet"</span><span class="p">,</span><span class="s2">"s3://warehouse/default/table_metadata_files/data/00000-0-afa8893c-de71-4710-97c9-6b01590d0c44.parquet"</span><span class="p">]]</span> |
| <a id="__codelineno-53-63" name="__codelineno-53-63" href="#__codelineno-53-63"></a><span class="n">file_format</span><span class="p">:</span> <span class="p">[[</span><span class="s2">"PARQUET"</span><span class="p">,</span><span class="s2">"PARQUET"</span><span class="p">]]</span> |
| <a id="__codelineno-53-64" name="__codelineno-53-64" href="#__codelineno-53-64"></a><span class="n">spec_id</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0</span><span class="p">,</span><span class="mi">0</span><span class="p">]]</span> |
| <a id="__codelineno-53-65" name="__codelineno-53-65" href="#__codelineno-53-65"></a><span class="n">record_count</span><span class="p">:</span> <span class="p">[[</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">]]</span> |
| <a id="__codelineno-53-66" name="__codelineno-53-66" href="#__codelineno-53-66"></a><span class="n">file_size_in_bytes</span><span class="p">:</span> <span class="p">[[</span><span class="mi">5459</span><span class="p">,</span><span class="mi">5459</span><span class="p">]]</span> |
| <a id="__codelineno-53-67" name="__codelineno-53-67" href="#__codelineno-53-67"></a><span class="n">column_sizes</span><span class="p">:</span> <span class="p">[[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">49</span><span class="p">,</span><span class="mi">78</span><span class="p">,</span><span class="mi">128</span><span class="p">,</span><span class="mi">94</span><span class="p">,</span><span class="mi">118</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">118</span><span class="p">,</span><span class="mi">118</span><span class="p">,</span><span class="mi">94</span><span class="p">,</span><span class="mi">78</span><span class="p">,</span><span class="mi">109</span><span class="p">],</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">49</span><span class="p">,</span><span class="mi">78</span><span class="p">,</span><span class="mi">128</span><span class="p">,</span><span class="mi">94</span><span class="p">,</span><span class="mi">118</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">118</span><span class="p">,</span><span class="mi">118</span><span class="p">,</span><span class="mi">94</span><span class="p">,</span><span class="mi">78</span><span class="p">,</span><span class="mi">109</span><span class="p">]]]</span> |
| <a id="__codelineno-53-68" name="__codelineno-53-68" href="#__codelineno-53-68"></a><span class="n">value_counts</span><span class="p">:</span> <span class="p">[[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">],</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">3</span><span class="p">]]]</span> |
| <a id="__codelineno-53-69" name="__codelineno-53-69" href="#__codelineno-53-69"></a><span class="n">null_value_counts</span><span class="p">:</span> <span class="p">[[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">],</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">]]]</span> |
| <a id="__codelineno-53-70" name="__codelineno-53-70" href="#__codelineno-53-70"></a><span class="n">nan_value_counts</span><span class="p">:</span> <span class="p">[[</span><span class="n">keys</span><span class="p">:[]</span><span class="n">values</span><span class="p">:[],</span><span class="n">keys</span><span class="p">:[]</span><span class="n">values</span><span class="p">:[]]]</span> |
| <a id="__codelineno-53-71" name="__codelineno-53-71" href="#__codelineno-53-71"></a><span class="n">lower_bounds</span><span class="p">:</span> <span class="p">[[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">00</span><span class="p">,</span><span class="mi">61</span><span class="p">,</span><span class="mi">61616161616161616161616161616161</span><span class="p">,</span><span class="mi">01000000</span><span class="p">,</span><span class="mi">0100000000000000</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mf">9E4</span><span class="n">B0000</span><span class="p">,</span><span class="mi">01</span><span class="p">,</span><span class="mi">00000000000000000000000000000000</span><span class="p">],</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">00</span><span class="p">,</span><span class="mi">61</span><span class="p">,</span><span class="mi">61616161616161616161616161616161</span><span class="p">,</span><span class="mi">01000000</span><span class="p">,</span><span class="mi">0100000000000000</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mf">9E4</span><span class="n">B0000</span><span class="p">,</span><span class="mi">01</span><span class="p">,</span><span class="mi">00000000000000000000000000000000</span><span class="p">]]]</span> |
| <a id="__codelineno-53-72" name="__codelineno-53-72" href="#__codelineno-53-72"></a><span class="n">upper_bounds</span><span class="p">:[[</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">00</span><span class="p">,</span><span class="mi">61</span><span class="p">,</span><span class="mi">61616161616161616161616161616161</span><span class="p">,</span><span class="mi">01000000</span><span class="p">,</span><span class="mi">0100000000000000</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mf">9E4</span><span class="n">B0000</span><span class="p">,</span><span class="mi">01</span><span class="p">,</span><span class="mi">00000000000000000000000000000000</span><span class="p">],</span><span class="n">keys</span><span class="p">:[</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">3</span><span class="p">,</span><span class="mi">4</span><span class="p">,</span><span class="mi">5</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">8</span><span class="p">,</span><span class="mi">9</span><span class="p">,</span><span class="mi">10</span><span class="p">,</span><span class="mi">11</span><span class="p">,</span><span class="mi">12</span><span class="p">]</span><span class="n">values</span><span class="p">:[</span><span class="mi">00</span><span class="p">,</span><span class="mi">61</span><span class="p">,</span><span class="mi">61616161616161616161616161616161</span><span class="p">,</span><span class="mi">01000000</span><span class="p">,</span><span class="mi">0100000000000000</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mi">009</span><span class="n">B6ACA38F10500</span><span class="p">,</span><span class="mf">9E4</span><span class="n">B0000</span><span class="p">,</span><span class="mi">01</span><span class="p">,</span><span class="mi">00000000000000000000000000000000</span><span class="p">]]]</span> |
| <a id="__codelineno-53-73" name="__codelineno-53-73" href="#__codelineno-53-73"></a><span class="n">key_metadata</span><span class="p">:</span> <span class="p">[[</span><span class="mi">0100</span><span class="p">,</span><span class="mi">0100</span><span class="p">]]</span> |
| <a id="__codelineno-53-74" name="__codelineno-53-74" href="#__codelineno-53-74"></a><span class="n">split_offsets</span><span class="p">:[[[],[]]]</span> |
| <a id="__codelineno-53-75" name="__codelineno-53-75" href="#__codelineno-53-75"></a><span class="n">equality_ids</span><span class="p">:[[[],[]]]</span> |
| <a id="__codelineno-53-76" name="__codelineno-53-76" href="#__codelineno-53-76"></a><span class="n">sort_order_id</span><span class="p">:[[[],[]]]</span> |
| <a id="__codelineno-53-77" name="__codelineno-53-77" href="#__codelineno-53-77"></a><span class="n">readable_metrics</span><span class="p">:</span> <span class="p">[</span> |
| <a id="__codelineno-53-78" name="__codelineno-53-78" href="#__codelineno-53-78"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-79" name="__codelineno-53-79" href="#__codelineno-53-79"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">large_string</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">large_string</span><span class="o">></span> |
| <a id="__codelineno-53-80" name="__codelineno-53-80" href="#__codelineno-53-80"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-81" name="__codelineno-53-81" href="#__codelineno-53-81"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-82" name="__codelineno-53-82" href="#__codelineno-53-82"></a><span class="p">[</span><span class="mi">140</span><span class="p">]</span> |
| <a id="__codelineno-53-83" name="__codelineno-53-83" href="#__codelineno-53-83"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-84" name="__codelineno-53-84" href="#__codelineno-53-84"></a><span class="p">[</span><span class="mi">4</span><span class="p">]</span> |
| <a id="__codelineno-53-85" name="__codelineno-53-85" href="#__codelineno-53-85"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-86" name="__codelineno-53-86" href="#__codelineno-53-86"></a><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <a id="__codelineno-53-87" name="__codelineno-53-87" href="#__codelineno-53-87"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-88" name="__codelineno-53-88" href="#__codelineno-53-88"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-53-89" name="__codelineno-53-89" href="#__codelineno-53-89"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">4</span> <span class="nb">type</span><span class="p">:</span> <span class="n">large_string</span> |
| <a id="__codelineno-53-90" name="__codelineno-53-90" href="#__codelineno-53-90"></a><span class="p">[</span><span class="s2">"Amsterdam"</span><span class="p">]</span> |
| <a id="__codelineno-53-91" name="__codelineno-53-91" href="#__codelineno-53-91"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">5</span> <span class="nb">type</span><span class="p">:</span> <span class="n">large_string</span> |
| <a id="__codelineno-53-92" name="__codelineno-53-92" href="#__codelineno-53-92"></a><span class="p">[</span><span class="s2">"San Francisco"</span><span class="p">]</span> |
| <a id="__codelineno-53-93" name="__codelineno-53-93" href="#__codelineno-53-93"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> |
| <a id="__codelineno-53-94" name="__codelineno-53-94" href="#__codelineno-53-94"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-95" name="__codelineno-53-95" href="#__codelineno-53-95"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-96" name="__codelineno-53-96" href="#__codelineno-53-96"></a><span class="p">[</span><span class="mi">135</span><span class="p">]</span> |
| <a id="__codelineno-53-97" name="__codelineno-53-97" href="#__codelineno-53-97"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-98" name="__codelineno-53-98" href="#__codelineno-53-98"></a><span class="p">[</span><span class="mi">4</span><span class="p">]</span> |
| <a id="__codelineno-53-99" name="__codelineno-53-99" href="#__codelineno-53-99"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-100" name="__codelineno-53-100" href="#__codelineno-53-100"></a><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <a id="__codelineno-53-101" name="__codelineno-53-101" href="#__codelineno-53-101"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-102" name="__codelineno-53-102" href="#__codelineno-53-102"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-53-103" name="__codelineno-53-103" href="#__codelineno-53-103"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">4</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-104" name="__codelineno-53-104" href="#__codelineno-53-104"></a><span class="p">[</span><span class="mf">37.773972</span><span class="p">]</span> |
| <a id="__codelineno-53-105" name="__codelineno-53-105" href="#__codelineno-53-105"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">5</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-106" name="__codelineno-53-106" href="#__codelineno-53-106"></a><span class="p">[</span><span class="mf">53.11254</span><span class="p">]</span> |
| <a id="__codelineno-53-107" name="__codelineno-53-107" href="#__codelineno-53-107"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">struct</span><span class="o"><</span><span class="n">column_size</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">null_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">nan_value_count</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> <span class="n">lower_bound</span><span class="p">:</span> <span class="n">double</span><span class="p">,</span> <span class="n">upper_bound</span><span class="p">:</span> <span class="n">double</span><span class="o">></span> |
| <a id="__codelineno-53-108" name="__codelineno-53-108" href="#__codelineno-53-108"></a> <span class="o">--</span> <span class="n">is_valid</span><span class="p">:</span> <span class="nb">all</span> <span class="ow">not</span> <span class="n">null</span> |
| <a id="__codelineno-53-109" name="__codelineno-53-109" href="#__codelineno-53-109"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">0</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-110" name="__codelineno-53-110" href="#__codelineno-53-110"></a><span class="p">[</span><span class="mi">135</span><span class="p">]</span> |
| <a id="__codelineno-53-111" name="__codelineno-53-111" href="#__codelineno-53-111"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">1</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-112" name="__codelineno-53-112" href="#__codelineno-53-112"></a><span class="p">[</span><span class="mi">4</span><span class="p">]</span> |
| <a id="__codelineno-53-113" name="__codelineno-53-113" href="#__codelineno-53-113"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">2</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-114" name="__codelineno-53-114" href="#__codelineno-53-114"></a><span class="p">[</span><span class="mi">0</span><span class="p">]</span> |
| <a id="__codelineno-53-115" name="__codelineno-53-115" href="#__codelineno-53-115"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">3</span> <span class="nb">type</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-53-116" name="__codelineno-53-116" href="#__codelineno-53-116"></a><span class="p">[</span><span class="n">null</span><span class="p">]</span> |
| <a id="__codelineno-53-117" name="__codelineno-53-117" href="#__codelineno-53-117"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">4</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-118" name="__codelineno-53-118" href="#__codelineno-53-118"></a><span class="p">[</span><span class="o">-</span><span class="mf">122.431297</span><span class="p">]</span> |
| <a id="__codelineno-53-119" name="__codelineno-53-119" href="#__codelineno-53-119"></a> <span class="o">--</span> <span class="n">child</span> <span class="mi">5</span> <span class="nb">type</span><span class="p">:</span> <span class="n">double</span> |
| <a id="__codelineno-53-120" name="__codelineno-53-120" href="#__codelineno-53-120"></a><span class="p">[</span><span class="mf">6.0989</span><span class="p">]]</span> |
| </code></pre></div> |
| <div class="admonition info"> |
| <p class="admonition-title">Info</p> |
| <p>Content refers to type of content stored by the data file: <code>0</code> - <code>Data</code>, <code>1</code> - <code>Position Deletes</code>, <code>2</code> - <code>Equality Deletes</code></p> |
| </div> |
| <p>To show only data files or delete files in the current snapshot, use <code>table.inspect.data_files()</code> and <code>table.inspect.delete_files()</code> respectively.</p> |
| <h2 id="add-files">Add Files<a class="headerlink" href="#add-files" title="Permanent link">¶</a></h2> |
| <p>Expert Iceberg users may choose to commit existing parquet files to the Iceberg table as data files, without rewriting them.</p> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Name Mapping and Field IDs</p> |
| <p><code>add_files</code> can work with Parquet files both with and without field IDs in their metadata: |
| - <strong>Files with field IDs</strong>: When field IDs are present in the Parquet metadata, they must match the corresponding field IDs in the Iceberg table schema. This is common for files generated by tools like Spark or when using or other libraries with explicit field ID metadata. |
| - <strong>Files without field IDs</strong>: When field IDs are absent, the table must have a <a href="https://iceberg.apache.org/spec/?h=name+mapping#name-mapping-serialization">Name Mapping</a> to map field names to Iceberg field IDs. <code>add_files</code> will automatically create a Name Mapping based on the table's current schema if one doesn't already exist.</p> |
| </div> |
| <div class="admonition note"> |
| <p class="admonition-title">Partitions</p> |
| <p><code>add_files</code> only requires the client to read the existing parquet files' metadata footer to infer the partition value of each file. This implementation also supports adding files to Iceberg tables with partition transforms like <code>MonthTransform</code>, and <code>TruncateTransform</code> which preserve the order of the values after the transformation (Any Transform that has the <code>preserves_order</code> property set to True is supported). Please note that if the column statistics of the <code>PartitionField</code>'s source column are not present in the parquet metadata, the partition value is inferred as <code>None</code>.</p> |
| </div> |
| <div class="admonition warning"> |
| <p class="admonition-title">Maintenance Operations</p> |
| <p>Because <code>add_files</code> commits the existing parquet files to the Iceberg Table as any other data file, destructive maintenance operations like expiring snapshots will remove them.</p> |
| </div> |
| <div class="admonition warning"> |
| <p class="admonition-title">Check Duplicate Files</p> |
| <p>The <code>check_duplicate_files</code> parameter determines whether the method validates that the specified <code>file_paths</code> do not already exist in the Iceberg table. When set to True (the default), the method performs a validation against the table’s current data files to prevent accidental duplication, helping to maintain data consistency by ensuring the same file is not added multiple times. While this check is important for data integrity, it can introduce performance overhead for tables with a large number of files. Setting check_duplicate_files=False can improve performance but increases the risk of duplicate files, which may lead to data inconsistencies or table corruption. It is strongly recommended to keep this parameter enabled unless duplicate file handling is strictly enforced elsewhere.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <h3 id="usage">Usage<a class="headerlink" href="#usage" title="Permanent link">¶</a></h3> |
| <table> |
| <thead> |
| <tr> |
| <th>Parameter</th> |
| <th>Required?</th> |
| <th>Type</th> |
| <th>Description</th> |
| </tr> |
| </thead> |
| <tbody> |
| <tr> |
| <td><code>file_paths</code></td> |
| <td>✔️</td> |
| <td>List[str]</td> |
| <td>The list of full file paths to be added as data files to the table</td> |
| </tr> |
| <tr> |
| <td><code>snapshot_properties</code></td> |
| <td></td> |
| <td>Dict[str, str]</td> |
| <td>Properties to set for the new snapshot. Defaults to an empty dictionary</td> |
| </tr> |
| <tr> |
| <td><code>check_duplicate_files</code></td> |
| <td></td> |
| <td>bool</td> |
| <td>Whether to check for duplicate files. Defaults to <code>True</code></td> |
| </tr> |
| </tbody> |
| </table> |
| <h3 id="example">Example<a class="headerlink" href="#example" title="Permanent link">¶</a></h3> |
| <p>Add files to Iceberg table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-54-1" name="__codelineno-54-1" href="#__codelineno-54-1"></a><span class="c1"># Given that these parquet files have schema consistent with the Iceberg table</span> |
| <a id="__codelineno-54-2" name="__codelineno-54-2" href="#__codelineno-54-2"></a> |
| <a id="__codelineno-54-3" name="__codelineno-54-3" href="#__codelineno-54-3"></a><span class="n">file_paths</span> <span class="o">=</span> <span class="p">[</span> |
| <a id="__codelineno-54-4" name="__codelineno-54-4" href="#__codelineno-54-4"></a> <span class="s2">"s3a://warehouse/default/existing-1.parquet"</span><span class="p">,</span> |
| <a id="__codelineno-54-5" name="__codelineno-54-5" href="#__codelineno-54-5"></a> <span class="s2">"s3a://warehouse/default/existing-2.parquet"</span><span class="p">,</span> |
| <a id="__codelineno-54-6" name="__codelineno-54-6" href="#__codelineno-54-6"></a><span class="p">]</span> |
| <a id="__codelineno-54-7" name="__codelineno-54-7" href="#__codelineno-54-7"></a> |
| <a id="__codelineno-54-8" name="__codelineno-54-8" href="#__codelineno-54-8"></a><span class="c1"># They can be added to the table without rewriting them</span> |
| <a id="__codelineno-54-9" name="__codelineno-54-9" href="#__codelineno-54-9"></a> |
| <a id="__codelineno-54-10" name="__codelineno-54-10" href="#__codelineno-54-10"></a><span class="n">tbl</span><span class="o">.</span><span class="n">add_files</span><span class="p">(</span><span class="n">file_paths</span><span class="o">=</span><span class="n">file_paths</span><span class="p">)</span> |
| <a id="__codelineno-54-11" name="__codelineno-54-11" href="#__codelineno-54-11"></a> |
| <a id="__codelineno-54-12" name="__codelineno-54-12" href="#__codelineno-54-12"></a><span class="c1"># A new snapshot is committed to the table with manifests pointing to the existing parquet files</span> |
| </code></pre></div> |
| <p>Add files to Iceberg table with custom snapshot properties:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-55-1" name="__codelineno-55-1" href="#__codelineno-55-1"></a><span class="c1"># Assume an existing Iceberg table object `tbl`</span> |
| <a id="__codelineno-55-2" name="__codelineno-55-2" href="#__codelineno-55-2"></a> |
| <a id="__codelineno-55-3" name="__codelineno-55-3" href="#__codelineno-55-3"></a><span class="n">file_paths</span> <span class="o">=</span> <span class="p">[</span> |
| <a id="__codelineno-55-4" name="__codelineno-55-4" href="#__codelineno-55-4"></a> <span class="s2">"s3a://warehouse/default/existing-1.parquet"</span><span class="p">,</span> |
| <a id="__codelineno-55-5" name="__codelineno-55-5" href="#__codelineno-55-5"></a> <span class="s2">"s3a://warehouse/default/existing-2.parquet"</span><span class="p">,</span> |
| <a id="__codelineno-55-6" name="__codelineno-55-6" href="#__codelineno-55-6"></a><span class="p">]</span> |
| <a id="__codelineno-55-7" name="__codelineno-55-7" href="#__codelineno-55-7"></a> |
| <a id="__codelineno-55-8" name="__codelineno-55-8" href="#__codelineno-55-8"></a><span class="c1"># Custom snapshot properties</span> |
| <a id="__codelineno-55-9" name="__codelineno-55-9" href="#__codelineno-55-9"></a><span class="n">snapshot_properties</span> <span class="o">=</span> <span class="p">{</span><span class="s2">"abc"</span><span class="p">:</span> <span class="s2">"def"</span><span class="p">}</span> |
| <a id="__codelineno-55-10" name="__codelineno-55-10" href="#__codelineno-55-10"></a> |
| <a id="__codelineno-55-11" name="__codelineno-55-11" href="#__codelineno-55-11"></a><span class="c1"># Enable duplicate file checking</span> |
| <a id="__codelineno-55-12" name="__codelineno-55-12" href="#__codelineno-55-12"></a><span class="n">check_duplicate_files</span> <span class="o">=</span> <span class="kc">True</span> |
| <a id="__codelineno-55-13" name="__codelineno-55-13" href="#__codelineno-55-13"></a> |
| <a id="__codelineno-55-14" name="__codelineno-55-14" href="#__codelineno-55-14"></a><span class="c1"># Add the Parquet files to the Iceberg table without rewriting</span> |
| <a id="__codelineno-55-15" name="__codelineno-55-15" href="#__codelineno-55-15"></a><span class="n">tbl</span><span class="o">.</span><span class="n">add_files</span><span class="p">(</span> |
| <a id="__codelineno-55-16" name="__codelineno-55-16" href="#__codelineno-55-16"></a> <span class="n">file_paths</span><span class="o">=</span><span class="n">file_paths</span><span class="p">,</span> |
| <a id="__codelineno-55-17" name="__codelineno-55-17" href="#__codelineno-55-17"></a> <span class="n">snapshot_properties</span><span class="o">=</span><span class="n">snapshot_properties</span><span class="p">,</span> |
| <a id="__codelineno-55-18" name="__codelineno-55-18" href="#__codelineno-55-18"></a> <span class="n">check_duplicate_files</span><span class="o">=</span><span class="n">check_duplicate_files</span> |
| <a id="__codelineno-55-19" name="__codelineno-55-19" href="#__codelineno-55-19"></a><span class="p">)</span> |
| <a id="__codelineno-55-20" name="__codelineno-55-20" href="#__codelineno-55-20"></a> |
| <a id="__codelineno-55-21" name="__codelineno-55-21" href="#__codelineno-55-21"></a><span class="c1"># NameMapping must have been set to enable reads</span> |
| <a id="__codelineno-55-22" name="__codelineno-55-22" href="#__codelineno-55-22"></a><span class="k">assert</span> <span class="n">tbl</span><span class="o">.</span><span class="n">name_mapping</span><span class="p">()</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span> |
| <a id="__codelineno-55-23" name="__codelineno-55-23" href="#__codelineno-55-23"></a> |
| <a id="__codelineno-55-24" name="__codelineno-55-24" href="#__codelineno-55-24"></a><span class="c1"># Verify that the snapshot property was set correctly</span> |
| <a id="__codelineno-55-25" name="__codelineno-55-25" href="#__codelineno-55-25"></a><span class="k">assert</span> <span class="n">tbl</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">snapshots</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">summary</span><span class="p">[</span><span class="s2">"abc"</span><span class="p">]</span> <span class="o">==</span> <span class="s2">"def"</span> |
| </code></pre></div> |
| <h2 id="schema-evolution">Schema evolution<a class="headerlink" href="#schema-evolution" title="Permanent link">¶</a></h2> |
| <p>PyIceberg supports full schema evolution through the Python API. It takes care of setting the field-IDs and makes sure that only non-breaking changes are done (can be overridden).</p> |
| <p>In the examples below, the <code>.update_schema()</code> is called from the table itself.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-56-1" name="__codelineno-56-1" href="#__codelineno-56-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-56-2" name="__codelineno-56-2" href="#__codelineno-56-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_column</span><span class="p">(</span><span class="s2">"some_field"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="s2">"doc"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>You can also initiate a transaction if you want to make more changes than just evolving the schema:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-57-1" name="__codelineno-57-1" href="#__codelineno-57-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">transaction</span><span class="p">()</span> <span class="k">as</span> <span class="n">transaction</span><span class="p">:</span> |
| <a id="__codelineno-57-2" name="__codelineno-57-2" href="#__codelineno-57-2"></a> <span class="k">with</span> <span class="n">transaction</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update_schema</span><span class="p">:</span> |
| <a id="__codelineno-57-3" name="__codelineno-57-3" href="#__codelineno-57-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_column</span><span class="p">(</span><span class="s2">"some_other_field"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="s2">"doc"</span><span class="p">)</span> |
| <a id="__codelineno-57-4" name="__codelineno-57-4" href="#__codelineno-57-4"></a> <span class="c1"># ... Update properties etc</span> |
| </code></pre></div> |
| <h3 id="union-by-name">Union by Name<a class="headerlink" href="#union-by-name" title="Permanent link">¶</a></h3> |
| <p>Using <code>.union_by_name()</code> you can merge another schema into an existing schema without having to worry about field-IDs:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-58-1" name="__codelineno-58-1" href="#__codelineno-58-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.catalog</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_catalog</span> |
| <a id="__codelineno-58-2" name="__codelineno-58-2" href="#__codelineno-58-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.schema</span><span class="w"> </span><span class="kn">import</span> <span class="n">Schema</span> |
| <a id="__codelineno-58-3" name="__codelineno-58-3" href="#__codelineno-58-3"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.types</span><span class="w"> </span><span class="kn">import</span> <span class="n">NestedField</span><span class="p">,</span> <span class="n">StringType</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">,</span> <span class="n">LongType</span> |
| <a id="__codelineno-58-4" name="__codelineno-58-4" href="#__codelineno-58-4"></a> |
| <a id="__codelineno-58-5" name="__codelineno-58-5" href="#__codelineno-58-5"></a><span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">()</span> |
| <a id="__codelineno-58-6" name="__codelineno-58-6" href="#__codelineno-58-6"></a> |
| <a id="__codelineno-58-7" name="__codelineno-58-7" href="#__codelineno-58-7"></a><span class="n">schema</span> <span class="o">=</span> <span class="n">Schema</span><span class="p">(</span> |
| <a id="__codelineno-58-8" name="__codelineno-58-8" href="#__codelineno-58-8"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s2">"city"</span><span class="p">,</span> <span class="n">StringType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-58-9" name="__codelineno-58-9" href="#__codelineno-58-9"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-58-10" name="__codelineno-58-10" href="#__codelineno-58-10"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-58-11" name="__codelineno-58-11" href="#__codelineno-58-11"></a><span class="p">)</span> |
| <a id="__codelineno-58-12" name="__codelineno-58-12" href="#__codelineno-58-12"></a> |
| <a id="__codelineno-58-13" name="__codelineno-58-13" href="#__codelineno-58-13"></a><span class="n">table</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span><span class="s2">"default.locations"</span><span class="p">,</span> <span class="n">schema</span><span class="p">)</span> |
| <a id="__codelineno-58-14" name="__codelineno-58-14" href="#__codelineno-58-14"></a> |
| <a id="__codelineno-58-15" name="__codelineno-58-15" href="#__codelineno-58-15"></a><span class="n">new_schema</span> <span class="o">=</span> <span class="n">Schema</span><span class="p">(</span> |
| <a id="__codelineno-58-16" name="__codelineno-58-16" href="#__codelineno-58-16"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">1</span><span class="p">,</span> <span class="s2">"city"</span><span class="p">,</span> <span class="n">StringType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-58-17" name="__codelineno-58-17" href="#__codelineno-58-17"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">2</span><span class="p">,</span> <span class="s2">"lat"</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-58-18" name="__codelineno-58-18" href="#__codelineno-58-18"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">3</span><span class="p">,</span> <span class="s2">"long"</span><span class="p">,</span> <span class="n">DoubleType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-58-19" name="__codelineno-58-19" href="#__codelineno-58-19"></a> <span class="n">NestedField</span><span class="p">(</span><span class="mi">10</span><span class="p">,</span> <span class="s2">"population"</span><span class="p">,</span> <span class="n">LongType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-58-20" name="__codelineno-58-20" href="#__codelineno-58-20"></a><span class="p">)</span> |
| <a id="__codelineno-58-21" name="__codelineno-58-21" href="#__codelineno-58-21"></a> |
| <a id="__codelineno-58-22" name="__codelineno-58-22" href="#__codelineno-58-22"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-58-23" name="__codelineno-58-23" href="#__codelineno-58-23"></a> <span class="n">update</span><span class="o">.</span><span class="n">union_by_name</span><span class="p">(</span><span class="n">new_schema</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Now the table has the union of the two schemas <code>print(table.schema())</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-59-1" name="__codelineno-59-1" href="#__codelineno-59-1"></a><span class="n">table</span> <span class="p">{</span> |
| <a id="__codelineno-59-2" name="__codelineno-59-2" href="#__codelineno-59-2"></a> <span class="mi">1</span><span class="p">:</span> <span class="n">city</span><span class="p">:</span> <span class="n">optional</span> <span class="n">string</span> |
| <a id="__codelineno-59-3" name="__codelineno-59-3" href="#__codelineno-59-3"></a> <span class="mi">2</span><span class="p">:</span> <span class="n">lat</span><span class="p">:</span> <span class="n">optional</span> <span class="n">double</span> |
| <a id="__codelineno-59-4" name="__codelineno-59-4" href="#__codelineno-59-4"></a> <span class="mi">3</span><span class="p">:</span> <span class="n">long</span><span class="p">:</span> <span class="n">optional</span> <span class="n">double</span> |
| <a id="__codelineno-59-5" name="__codelineno-59-5" href="#__codelineno-59-5"></a> <span class="mi">4</span><span class="p">:</span> <span class="n">population</span><span class="p">:</span> <span class="n">optional</span> <span class="n">long</span> |
| <a id="__codelineno-59-6" name="__codelineno-59-6" href="#__codelineno-59-6"></a><span class="p">}</span> |
| </code></pre></div> |
| <h3 id="add-column">Add column<a class="headerlink" href="#add-column" title="Permanent link">¶</a></h3> |
| <p>Using <code>add_column</code> you can add a column, without having to worry about the field-id:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-60-1" name="__codelineno-60-1" href="#__codelineno-60-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-60-2" name="__codelineno-60-2" href="#__codelineno-60-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_column</span><span class="p">(</span><span class="s2">"retries"</span><span class="p">,</span> <span class="n">IntegerType</span><span class="p">(),</span> <span class="s2">"Number of retries to place the bid"</span><span class="p">)</span> |
| <a id="__codelineno-60-3" name="__codelineno-60-3" href="#__codelineno-60-3"></a> <span class="c1"># In a struct</span> |
| <a id="__codelineno-60-4" name="__codelineno-60-4" href="#__codelineno-60-4"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_column</span><span class="p">(</span><span class="s2">"details"</span><span class="p">,</span> <span class="n">StructType</span><span class="p">())</span> |
| <a id="__codelineno-60-5" name="__codelineno-60-5" href="#__codelineno-60-5"></a> |
| <a id="__codelineno-60-6" name="__codelineno-60-6" href="#__codelineno-60-6"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-60-7" name="__codelineno-60-7" href="#__codelineno-60-7"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_column</span><span class="p">((</span><span class="s2">"details"</span><span class="p">,</span> <span class="s2">"confirmed_by"</span><span class="p">),</span> <span class="n">StringType</span><span class="p">(),</span> <span class="s2">"Name of the exchange"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>A complex type must exist before columns can be added to it. Fields in complex types are added in a tuple.</p> |
| <h3 id="rename-column">Rename column<a class="headerlink" href="#rename-column" title="Permanent link">¶</a></h3> |
| <p>Renaming a field in an Iceberg table is simple:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-61-1" name="__codelineno-61-1" href="#__codelineno-61-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-61-2" name="__codelineno-61-2" href="#__codelineno-61-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">rename_column</span><span class="p">(</span><span class="s2">"retries"</span><span class="p">,</span> <span class="s2">"num_retries"</span><span class="p">)</span> |
| <a id="__codelineno-61-3" name="__codelineno-61-3" href="#__codelineno-61-3"></a> <span class="c1"># This will rename `confirmed_by` to `processed_by` in the `details` struct</span> |
| <a id="__codelineno-61-4" name="__codelineno-61-4" href="#__codelineno-61-4"></a> <span class="n">update</span><span class="o">.</span><span class="n">rename_column</span><span class="p">((</span><span class="s2">"details"</span><span class="p">,</span> <span class="s2">"confirmed_by"</span><span class="p">),</span> <span class="s2">"processed_by"</span><span class="p">)</span> |
| </code></pre></div> |
| <h3 id="move-column">Move column<a class="headerlink" href="#move-column" title="Permanent link">¶</a></h3> |
| <p>Move order of fields:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-62-1" name="__codelineno-62-1" href="#__codelineno-62-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-62-2" name="__codelineno-62-2" href="#__codelineno-62-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">move_first</span><span class="p">(</span><span class="s2">"symbol"</span><span class="p">)</span> |
| <a id="__codelineno-62-3" name="__codelineno-62-3" href="#__codelineno-62-3"></a> <span class="c1"># This will move `bid` after `ask`</span> |
| <a id="__codelineno-62-4" name="__codelineno-62-4" href="#__codelineno-62-4"></a> <span class="n">update</span><span class="o">.</span><span class="n">move_after</span><span class="p">(</span><span class="s2">"bid"</span><span class="p">,</span> <span class="s2">"ask"</span><span class="p">)</span> |
| <a id="__codelineno-62-5" name="__codelineno-62-5" href="#__codelineno-62-5"></a> <span class="c1"># This will move `confirmed_by` before `exchange` in the `details` struct</span> |
| <a id="__codelineno-62-6" name="__codelineno-62-6" href="#__codelineno-62-6"></a> <span class="n">update</span><span class="o">.</span><span class="n">move_before</span><span class="p">((</span><span class="s2">"details"</span><span class="p">,</span> <span class="s2">"confirmed_by"</span><span class="p">),</span> <span class="p">(</span><span class="s2">"details"</span><span class="p">,</span> <span class="s2">"exchange"</span><span class="p">))</span> |
| </code></pre></div> |
| <h3 id="update-column">Update column<a class="headerlink" href="#update-column" title="Permanent link">¶</a></h3> |
| <p>Update a fields' type, description or required.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-63-1" name="__codelineno-63-1" href="#__codelineno-63-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-63-2" name="__codelineno-63-2" href="#__codelineno-63-2"></a> <span class="c1"># Promote a float to a double</span> |
| <a id="__codelineno-63-3" name="__codelineno-63-3" href="#__codelineno-63-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">update_column</span><span class="p">(</span><span class="s2">"bid"</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">DoubleType</span><span class="p">())</span> |
| <a id="__codelineno-63-4" name="__codelineno-63-4" href="#__codelineno-63-4"></a> <span class="c1"># Make a field optional</span> |
| <a id="__codelineno-63-5" name="__codelineno-63-5" href="#__codelineno-63-5"></a> <span class="n">update</span><span class="o">.</span><span class="n">update_column</span><span class="p">(</span><span class="s2">"symbol"</span><span class="p">,</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">)</span> |
| <a id="__codelineno-63-6" name="__codelineno-63-6" href="#__codelineno-63-6"></a> <span class="c1"># Update the documentation</span> |
| <a id="__codelineno-63-7" name="__codelineno-63-7" href="#__codelineno-63-7"></a> <span class="n">update</span><span class="o">.</span><span class="n">update_column</span><span class="p">(</span><span class="s2">"symbol"</span><span class="p">,</span> <span class="n">doc</span><span class="o">=</span><span class="s2">"Name of the share on the exchange"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Be careful, some operations are not compatible, but can still be done at your own risk by setting <code>allow_incompatible_changes</code>:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-64-1" name="__codelineno-64-1" href="#__codelineno-64-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">(</span><span class="n">allow_incompatible_changes</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-64-2" name="__codelineno-64-2" href="#__codelineno-64-2"></a> <span class="c1"># Incompatible change, cannot require an optional field</span> |
| <a id="__codelineno-64-3" name="__codelineno-64-3" href="#__codelineno-64-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">update_column</span><span class="p">(</span><span class="s2">"symbol"</span><span class="p">,</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> |
| </code></pre></div> |
| <h3 id="delete-column">Delete column<a class="headerlink" href="#delete-column" title="Permanent link">¶</a></h3> |
| <p>Delete a field, careful this is a incompatible change (readers/writers might expect this field):</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-65-1" name="__codelineno-65-1" href="#__codelineno-65-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_schema</span><span class="p">(</span><span class="n">allow_incompatible_changes</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-65-2" name="__codelineno-65-2" href="#__codelineno-65-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">delete_column</span><span class="p">(</span><span class="s2">"some_field"</span><span class="p">)</span> |
| <a id="__codelineno-65-3" name="__codelineno-65-3" href="#__codelineno-65-3"></a> <span class="c1"># In a struct</span> |
| <a id="__codelineno-65-4" name="__codelineno-65-4" href="#__codelineno-65-4"></a> <span class="n">update</span><span class="o">.</span><span class="n">delete_column</span><span class="p">((</span><span class="s2">"details"</span><span class="p">,</span> <span class="s2">"confirmed_by"</span><span class="p">))</span> |
| </code></pre></div> |
| <h2 id="partition-evolution">Partition evolution<a class="headerlink" href="#partition-evolution" title="Permanent link">¶</a></h2> |
| <p>PyIceberg supports partition evolution. See the <a href="https://iceberg.apache.org/spec/#partition-evolution">partition evolution</a> |
| for more details.</p> |
| <p>The API to use when evolving partitions is the <code>update_spec</code> API on the table.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-66-1" name="__codelineno-66-1" href="#__codelineno-66-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_spec</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-66-2" name="__codelineno-66-2" href="#__codelineno-66-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_field</span><span class="p">(</span><span class="s2">"id"</span><span class="p">,</span> <span class="n">BucketTransform</span><span class="p">(</span><span class="mi">16</span><span class="p">),</span> <span class="s2">"bucketed_id"</span><span class="p">)</span> |
| <a id="__codelineno-66-3" name="__codelineno-66-3" href="#__codelineno-66-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_field</span><span class="p">(</span><span class="s2">"event_ts"</span><span class="p">,</span> <span class="n">DayTransform</span><span class="p">(),</span> <span class="s2">"day_ts"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Updating the partition spec can also be done as part of a transaction with other operations.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-67-1" name="__codelineno-67-1" href="#__codelineno-67-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">transaction</span><span class="p">()</span> <span class="k">as</span> <span class="n">transaction</span><span class="p">:</span> |
| <a id="__codelineno-67-2" name="__codelineno-67-2" href="#__codelineno-67-2"></a> <span class="k">with</span> <span class="n">transaction</span><span class="o">.</span><span class="n">update_spec</span><span class="p">()</span> <span class="k">as</span> <span class="n">update_spec</span><span class="p">:</span> |
| <a id="__codelineno-67-3" name="__codelineno-67-3" href="#__codelineno-67-3"></a> <span class="n">update_spec</span><span class="o">.</span><span class="n">add_field</span><span class="p">(</span><span class="s2">"id"</span><span class="p">,</span> <span class="n">BucketTransform</span><span class="p">(</span><span class="mi">16</span><span class="p">),</span> <span class="s2">"bucketed_id"</span><span class="p">)</span> |
| <a id="__codelineno-67-4" name="__codelineno-67-4" href="#__codelineno-67-4"></a> <span class="n">update_spec</span><span class="o">.</span><span class="n">add_field</span><span class="p">(</span><span class="s2">"event_ts"</span><span class="p">,</span> <span class="n">DayTransform</span><span class="p">(),</span> <span class="s2">"day_ts"</span><span class="p">)</span> |
| <a id="__codelineno-67-5" name="__codelineno-67-5" href="#__codelineno-67-5"></a> <span class="c1"># ... Update properties etc</span> |
| </code></pre></div> |
| <h3 id="add-fields">Add fields<a class="headerlink" href="#add-fields" title="Permanent link">¶</a></h3> |
| <p>New partition fields can be added via the <code>add_field</code> API which takes in the field name to partition on, |
| the partition transform, and an optional partition name. If the partition name is not specified, |
| one will be created.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-68-1" name="__codelineno-68-1" href="#__codelineno-68-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_spec</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-68-2" name="__codelineno-68-2" href="#__codelineno-68-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_field</span><span class="p">(</span><span class="s2">"id"</span><span class="p">,</span> <span class="n">BucketTransform</span><span class="p">(</span><span class="mi">16</span><span class="p">),</span> <span class="s2">"bucketed_id"</span><span class="p">)</span> |
| <a id="__codelineno-68-3" name="__codelineno-68-3" href="#__codelineno-68-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">add_field</span><span class="p">(</span><span class="s2">"event_ts"</span><span class="p">,</span> <span class="n">DayTransform</span><span class="p">(),</span> <span class="s2">"day_ts"</span><span class="p">)</span> |
| <a id="__codelineno-68-4" name="__codelineno-68-4" href="#__codelineno-68-4"></a> <span class="c1"># identity is a shortcut API for adding an IdentityTransform</span> |
| <a id="__codelineno-68-5" name="__codelineno-68-5" href="#__codelineno-68-5"></a> <span class="n">update</span><span class="o">.</span><span class="n">identity</span><span class="p">(</span><span class="s2">"some_field"</span><span class="p">)</span> |
| </code></pre></div> |
| <h3 id="remove-fields">Remove fields<a class="headerlink" href="#remove-fields" title="Permanent link">¶</a></h3> |
| <p>Partition fields can also be removed via the <code>remove_field</code> API if it no longer makes sense to partition on those fields.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-69-1" name="__codelineno-69-1" href="#__codelineno-69-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_spec</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-69-2" name="__codelineno-69-2" href="#__codelineno-69-2"></a> <span class="c1"># Remove the partition field with the name</span> |
| <a id="__codelineno-69-3" name="__codelineno-69-3" href="#__codelineno-69-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">remove_field</span><span class="p">(</span><span class="s2">"some_partition_name"</span><span class="p">)</span> |
| </code></pre></div> |
| <h3 id="rename-fields">Rename fields<a class="headerlink" href="#rename-fields" title="Permanent link">¶</a></h3> |
| <p>Partition fields can also be renamed via the <code>rename_field</code> API.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-70-1" name="__codelineno-70-1" href="#__codelineno-70-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_spec</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-70-2" name="__codelineno-70-2" href="#__codelineno-70-2"></a> <span class="c1"># Rename the partition field with the name bucketed_id to sharded_id</span> |
| <a id="__codelineno-70-3" name="__codelineno-70-3" href="#__codelineno-70-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">rename_field</span><span class="p">(</span><span class="s2">"bucketed_id"</span><span class="p">,</span> <span class="s2">"sharded_id"</span><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="sort-order-updates">Sort order updates<a class="headerlink" href="#sort-order-updates" title="Permanent link">¶</a></h2> |
| <p>Users can update the sort order on existing tables for new data. See <a href="https://iceberg.apache.org/spec/#sorting">sorting</a> for more details.</p> |
| <p>The API to use when updating a sort order is the <code>update_sort_order</code> API on the table.</p> |
| <p>Sort orders can only be updated by adding a new sort order. They cannot be deleted or modified.</p> |
| <h3 id="updating-a-sort-order-on-a-table">Updating a sort order on a table<a class="headerlink" href="#updating-a-sort-order-on-a-table" title="Permanent link">¶</a></h3> |
| <p>To create a new sort order, you can use either the <code>asc</code> or <code>desc</code> API depending on whether you want you data sorted in ascending or descending order. Both take the name of the field, the sort order transform, and a null order that describes the order of null values when sorted.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-71-1" name="__codelineno-71-1" href="#__codelineno-71-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_sort_order</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-71-2" name="__codelineno-71-2" href="#__codelineno-71-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">desc</span><span class="p">(</span><span class="s2">"event_ts"</span><span class="p">,</span> <span class="n">DayTransform</span><span class="p">(),</span> <span class="n">NullOrder</span><span class="o">.</span><span class="n">NULLS_FIRST</span><span class="p">)</span> |
| <a id="__codelineno-71-3" name="__codelineno-71-3" href="#__codelineno-71-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">asc</span><span class="p">(</span><span class="s2">"some_field"</span><span class="p">,</span> <span class="n">IdentityTransform</span><span class="p">(),</span> <span class="n">NullOrder</span><span class="o">.</span><span class="n">NULLS_LAST</span><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="table-properties">Table properties<a class="headerlink" href="#table-properties" title="Permanent link">¶</a></h2> |
| <p>Set and remove properties through the <code>Transaction</code> API:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-72-1" name="__codelineno-72-1" href="#__codelineno-72-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">transaction</span><span class="p">()</span> <span class="k">as</span> <span class="n">transaction</span><span class="p">:</span> |
| <a id="__codelineno-72-2" name="__codelineno-72-2" href="#__codelineno-72-2"></a> <span class="n">transaction</span><span class="o">.</span><span class="n">set_properties</span><span class="p">(</span><span class="n">abc</span><span class="o">=</span><span class="s2">"def"</span><span class="p">)</span> |
| <a id="__codelineno-72-3" name="__codelineno-72-3" href="#__codelineno-72-3"></a> |
| <a id="__codelineno-72-4" name="__codelineno-72-4" href="#__codelineno-72-4"></a><span class="k">assert</span> <span class="n">table</span><span class="o">.</span><span class="n">properties</span> <span class="o">==</span> <span class="p">{</span><span class="s2">"abc"</span><span class="p">:</span> <span class="s2">"def"</span><span class="p">}</span> |
| <a id="__codelineno-72-5" name="__codelineno-72-5" href="#__codelineno-72-5"></a> |
| <a id="__codelineno-72-6" name="__codelineno-72-6" href="#__codelineno-72-6"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">transaction</span><span class="p">()</span> <span class="k">as</span> <span class="n">transaction</span><span class="p">:</span> |
| <a id="__codelineno-72-7" name="__codelineno-72-7" href="#__codelineno-72-7"></a> <span class="n">transaction</span><span class="o">.</span><span class="n">remove_properties</span><span class="p">(</span><span class="s2">"abc"</span><span class="p">)</span> |
| <a id="__codelineno-72-8" name="__codelineno-72-8" href="#__codelineno-72-8"></a> |
| <a id="__codelineno-72-9" name="__codelineno-72-9" href="#__codelineno-72-9"></a><span class="k">assert</span> <span class="n">table</span><span class="o">.</span><span class="n">properties</span> <span class="o">==</span> <span class="p">{}</span> |
| </code></pre></div> |
| <p>Or, without context manager:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-73-1" name="__codelineno-73-1" href="#__codelineno-73-1"></a><span class="n">table</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">transaction</span><span class="p">()</span><span class="o">.</span><span class="n">set_properties</span><span class="p">(</span><span class="n">abc</span><span class="o">=</span><span class="s2">"def"</span><span class="p">)</span><span class="o">.</span><span class="n">commit_transaction</span><span class="p">()</span> |
| <a id="__codelineno-73-2" name="__codelineno-73-2" href="#__codelineno-73-2"></a> |
| <a id="__codelineno-73-3" name="__codelineno-73-3" href="#__codelineno-73-3"></a><span class="k">assert</span> <span class="n">table</span><span class="o">.</span><span class="n">properties</span> <span class="o">==</span> <span class="p">{</span><span class="s2">"abc"</span><span class="p">:</span> <span class="s2">"def"</span><span class="p">}</span> |
| <a id="__codelineno-73-4" name="__codelineno-73-4" href="#__codelineno-73-4"></a> |
| <a id="__codelineno-73-5" name="__codelineno-73-5" href="#__codelineno-73-5"></a><span class="n">table</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">transaction</span><span class="p">()</span><span class="o">.</span><span class="n">remove_properties</span><span class="p">(</span><span class="s2">"abc"</span><span class="p">)</span><span class="o">.</span><span class="n">commit_transaction</span><span class="p">()</span> |
| <a id="__codelineno-73-6" name="__codelineno-73-6" href="#__codelineno-73-6"></a> |
| <a id="__codelineno-73-7" name="__codelineno-73-7" href="#__codelineno-73-7"></a><span class="k">assert</span> <span class="n">table</span><span class="o">.</span><span class="n">properties</span> <span class="o">==</span> <span class="p">{}</span> |
| </code></pre></div> |
| <h2 id="snapshot-properties">Snapshot properties<a class="headerlink" href="#snapshot-properties" title="Permanent link">¶</a></h2> |
| <p>Optionally, Snapshot properties can be set while writing to a table using <code>append</code> or <code>overwrite</code> API:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-74-1" name="__codelineno-74-1" href="#__codelineno-74-1"></a><span class="n">tbl</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">snapshot_properties</span><span class="o">=</span><span class="p">{</span><span class="s2">"abc"</span><span class="p">:</span> <span class="s2">"def"</span><span class="p">})</span> |
| <a id="__codelineno-74-2" name="__codelineno-74-2" href="#__codelineno-74-2"></a> |
| <a id="__codelineno-74-3" name="__codelineno-74-3" href="#__codelineno-74-3"></a><span class="c1"># or</span> |
| <a id="__codelineno-74-4" name="__codelineno-74-4" href="#__codelineno-74-4"></a> |
| <a id="__codelineno-74-5" name="__codelineno-74-5" href="#__codelineno-74-5"></a><span class="n">tbl</span><span class="o">.</span><span class="n">overwrite</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">snapshot_properties</span><span class="o">=</span><span class="p">{</span><span class="s2">"abc"</span><span class="p">:</span> <span class="s2">"def"</span><span class="p">})</span> |
| <a id="__codelineno-74-6" name="__codelineno-74-6" href="#__codelineno-74-6"></a> |
| <a id="__codelineno-74-7" name="__codelineno-74-7" href="#__codelineno-74-7"></a><span class="k">assert</span> <span class="n">tbl</span><span class="o">.</span><span class="n">metadata</span><span class="o">.</span><span class="n">snapshots</span><span class="p">[</span><span class="o">-</span><span class="mi">1</span><span class="p">]</span><span class="o">.</span><span class="n">summary</span><span class="p">[</span><span class="s2">"abc"</span><span class="p">]</span> <span class="o">==</span> <span class="s2">"def"</span> |
| </code></pre></div> |
| <h2 id="snapshot-management">Snapshot Management<a class="headerlink" href="#snapshot-management" title="Permanent link">¶</a></h2> |
| <p>Manage snapshots with operations through the <code>Table</code> API:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-75-1" name="__codelineno-75-1" href="#__codelineno-75-1"></a><span class="c1"># To run a specific operation</span> |
| <a id="__codelineno-75-2" name="__codelineno-75-2" href="#__codelineno-75-2"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">create_tag</span><span class="p">(</span><span class="n">snapshot_id</span><span class="p">,</span> <span class="s2">"tag123"</span><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-75-3" name="__codelineno-75-3" href="#__codelineno-75-3"></a><span class="c1"># To run multiple operations</span> |
| <a id="__codelineno-75-4" name="__codelineno-75-4" href="#__codelineno-75-4"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span> |
| <a id="__codelineno-75-5" name="__codelineno-75-5" href="#__codelineno-75-5"></a> <span class="o">.</span><span class="n">create_tag</span><span class="p">(</span><span class="n">snapshot_id1</span><span class="p">,</span> <span class="s2">"tag123"</span><span class="p">)</span> |
| <a id="__codelineno-75-6" name="__codelineno-75-6" href="#__codelineno-75-6"></a> <span class="o">.</span><span class="n">create_tag</span><span class="p">(</span><span class="n">snapshot_id2</span><span class="p">,</span> <span class="s2">"tag456"</span><span class="p">)</span> |
| <a id="__codelineno-75-7" name="__codelineno-75-7" href="#__codelineno-75-7"></a> <span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-75-8" name="__codelineno-75-8" href="#__codelineno-75-8"></a><span class="c1"># Operations are applied on commit.</span> |
| </code></pre></div> |
| <p>You can also use context managers to make more changes:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-76-1" name="__codelineno-76-1" href="#__codelineno-76-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span> <span class="k">as</span> <span class="n">ms</span><span class="p">:</span> |
| <a id="__codelineno-76-2" name="__codelineno-76-2" href="#__codelineno-76-2"></a> <span class="n">ms</span><span class="o">.</span><span class="n">create_branch</span><span class="p">(</span><span class="n">snapshot_id1</span><span class="p">,</span> <span class="s2">"Branch_A"</span><span class="p">)</span><span class="o">.</span><span class="n">create_tag</span><span class="p">(</span><span class="n">snapshot_id2</span><span class="p">,</span> <span class="s2">"tag789"</span><span class="p">)</span> |
| </code></pre></div> |
| <h3 id="tags">Tags<a class="headerlink" href="#tags" title="Permanent link">¶</a></h3> |
| <p>Tags are named references to snapshots that are immutable. They can be used to mark important snapshots for long-term retention or to reference specific table versions.</p> |
| <p>Create a tag pointing to a specific snapshot:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-77-1" name="__codelineno-77-1" href="#__codelineno-77-1"></a><span class="c1"># Create a tag with default retention</span> |
| <a id="__codelineno-77-2" name="__codelineno-77-2" href="#__codelineno-77-2"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">create_tag</span><span class="p">(</span> |
| <a id="__codelineno-77-3" name="__codelineno-77-3" href="#__codelineno-77-3"></a> <span class="n">snapshot_id</span><span class="o">=</span><span class="n">snapshot_id</span><span class="p">,</span> |
| <a id="__codelineno-77-4" name="__codelineno-77-4" href="#__codelineno-77-4"></a> <span class="n">tag_name</span><span class="o">=</span><span class="s2">"v1.0.0"</span> |
| <a id="__codelineno-77-5" name="__codelineno-77-5" href="#__codelineno-77-5"></a><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-77-6" name="__codelineno-77-6" href="#__codelineno-77-6"></a> |
| <a id="__codelineno-77-7" name="__codelineno-77-7" href="#__codelineno-77-7"></a><span class="c1"># Create a tag with custom max reference age</span> |
| <a id="__codelineno-77-8" name="__codelineno-77-8" href="#__codelineno-77-8"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">create_tag</span><span class="p">(</span> |
| <a id="__codelineno-77-9" name="__codelineno-77-9" href="#__codelineno-77-9"></a> <span class="n">snapshot_id</span><span class="o">=</span><span class="n">snapshot_id</span><span class="p">,</span> |
| <a id="__codelineno-77-10" name="__codelineno-77-10" href="#__codelineno-77-10"></a> <span class="n">tag_name</span><span class="o">=</span><span class="s2">"v1.0.0"</span><span class="p">,</span> |
| <a id="__codelineno-77-11" name="__codelineno-77-11" href="#__codelineno-77-11"></a> <span class="n">max_ref_age_ms</span><span class="o">=</span><span class="mi">604800000</span> <span class="c1"># 7 days</span> |
| <a id="__codelineno-77-12" name="__codelineno-77-12" href="#__codelineno-77-12"></a><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| </code></pre></div> |
| <p>Remove an existing tag:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-78-1" name="__codelineno-78-1" href="#__codelineno-78-1"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">remove_tag</span><span class="p">(</span><span class="s2">"v1.0.0"</span><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| </code></pre></div> |
| <h3 id="branching">Branching<a class="headerlink" href="#branching" title="Permanent link">¶</a></h3> |
| <p>Branches are mutable named references to snapshots that can be updated over time. They allow for independent lineages of table changes, enabling use cases like development branches, testing environments, or parallel workflows.</p> |
| <p>Create a branch pointing to a specific snapshot:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-79-1" name="__codelineno-79-1" href="#__codelineno-79-1"></a><span class="c1"># Create a branch with default settings</span> |
| <a id="__codelineno-79-2" name="__codelineno-79-2" href="#__codelineno-79-2"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">create_branch</span><span class="p">(</span> |
| <a id="__codelineno-79-3" name="__codelineno-79-3" href="#__codelineno-79-3"></a> <span class="n">snapshot_id</span><span class="o">=</span><span class="n">snapshot_id</span><span class="p">,</span> |
| <a id="__codelineno-79-4" name="__codelineno-79-4" href="#__codelineno-79-4"></a> <span class="n">branch_name</span><span class="o">=</span><span class="s2">"dev"</span> |
| <a id="__codelineno-79-5" name="__codelineno-79-5" href="#__codelineno-79-5"></a><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-79-6" name="__codelineno-79-6" href="#__codelineno-79-6"></a> |
| <a id="__codelineno-79-7" name="__codelineno-79-7" href="#__codelineno-79-7"></a><span class="c1"># Create a branch with retention policies</span> |
| <a id="__codelineno-79-8" name="__codelineno-79-8" href="#__codelineno-79-8"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">create_branch</span><span class="p">(</span> |
| <a id="__codelineno-79-9" name="__codelineno-79-9" href="#__codelineno-79-9"></a> <span class="n">snapshot_id</span><span class="o">=</span><span class="n">snapshot_id</span><span class="p">,</span> |
| <a id="__codelineno-79-10" name="__codelineno-79-10" href="#__codelineno-79-10"></a> <span class="n">branch_name</span><span class="o">=</span><span class="s2">"dev"</span><span class="p">,</span> |
| <a id="__codelineno-79-11" name="__codelineno-79-11" href="#__codelineno-79-11"></a> <span class="n">max_ref_age_ms</span><span class="o">=</span><span class="mi">604800000</span><span class="p">,</span> <span class="c1"># Max age of the branch reference (7 days)</span> |
| <a id="__codelineno-79-12" name="__codelineno-79-12" href="#__codelineno-79-12"></a> <span class="n">max_snapshot_age_ms</span><span class="o">=</span><span class="mi">259200000</span><span class="p">,</span> <span class="c1"># Max age of snapshots to keep (3 days)</span> |
| <a id="__codelineno-79-13" name="__codelineno-79-13" href="#__codelineno-79-13"></a> <span class="n">min_snapshots_to_keep</span><span class="o">=</span><span class="mi">10</span> <span class="c1"># Minimum number of snapshots to retain</span> |
| <a id="__codelineno-79-14" name="__codelineno-79-14" href="#__codelineno-79-14"></a><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| </code></pre></div> |
| <p>Remove an existing branch:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-80-1" name="__codelineno-80-1" href="#__codelineno-80-1"></a><span class="n">table</span><span class="o">.</span><span class="n">manage_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">remove_branch</span><span class="p">(</span><span class="s2">"dev"</span><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| </code></pre></div> |
| <h2 id="table-maintenance">Table Maintenance<a class="headerlink" href="#table-maintenance" title="Permanent link">¶</a></h2> |
| <p>PyIceberg provides table maintenance operations through the <code>table.maintenance</code> API. This provides a clean interface for performing maintenance tasks like snapshot expiration.</p> |
| <h3 id="snapshot-expiration">Snapshot Expiration<a class="headerlink" href="#snapshot-expiration" title="Permanent link">¶</a></h3> |
| <p>Expire old snapshots to clean up table metadata and reduce storage costs:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-81-1" name="__codelineno-81-1" href="#__codelineno-81-1"></a><span class="c1"># Expire snapshots older than three days</span> |
| <a id="__codelineno-81-2" name="__codelineno-81-2" href="#__codelineno-81-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">datetime</span><span class="w"> </span><span class="kn">import</span> <span class="n">datetime</span><span class="p">,</span> <span class="n">timedelta</span> |
| <a id="__codelineno-81-3" name="__codelineno-81-3" href="#__codelineno-81-3"></a><span class="n">table</span><span class="o">.</span><span class="n">maintenance</span><span class="o">.</span><span class="n">expire_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">older_than</span><span class="p">(</span> |
| <a id="__codelineno-81-4" name="__codelineno-81-4" href="#__codelineno-81-4"></a> <span class="n">datetime</span><span class="o">.</span><span class="n">now</span><span class="p">()</span> <span class="o">-</span> <span class="n">timedelta</span><span class="p">(</span><span class="n">days</span><span class="o">=</span><span class="mi">3</span><span class="p">)</span> |
| <a id="__codelineno-81-5" name="__codelineno-81-5" href="#__codelineno-81-5"></a><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-81-6" name="__codelineno-81-6" href="#__codelineno-81-6"></a> |
| <a id="__codelineno-81-7" name="__codelineno-81-7" href="#__codelineno-81-7"></a><span class="c1"># Expire a specific snapshot by ID</span> |
| <a id="__codelineno-81-8" name="__codelineno-81-8" href="#__codelineno-81-8"></a><span class="n">table</span><span class="o">.</span><span class="n">maintenance</span><span class="o">.</span><span class="n">expire_snapshots</span><span class="p">()</span><span class="o">.</span><span class="n">by_id</span><span class="p">(</span><span class="mi">12345</span><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-81-9" name="__codelineno-81-9" href="#__codelineno-81-9"></a> |
| <a id="__codelineno-81-10" name="__codelineno-81-10" href="#__codelineno-81-10"></a><span class="c1"># Context manager usage (recommended for multiple operations)</span> |
| <a id="__codelineno-81-11" name="__codelineno-81-11" href="#__codelineno-81-11"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">maintenance</span><span class="o">.</span><span class="n">expire_snapshots</span><span class="p">()</span> <span class="k">as</span> <span class="n">expire</span><span class="p">:</span> |
| <a id="__codelineno-81-12" name="__codelineno-81-12" href="#__codelineno-81-12"></a> <span class="n">expire</span><span class="o">.</span><span class="n">by_id</span><span class="p">(</span><span class="mi">12345</span><span class="p">)</span> |
| <a id="__codelineno-81-13" name="__codelineno-81-13" href="#__codelineno-81-13"></a> <span class="n">expire</span><span class="o">.</span><span class="n">by_id</span><span class="p">(</span><span class="mi">67890</span><span class="p">)</span> |
| <a id="__codelineno-81-14" name="__codelineno-81-14" href="#__codelineno-81-14"></a> <span class="c1"># Automatically commits when exiting the context</span> |
| </code></pre></div> |
| <h4 id="real-world-example">Real-world Example<a class="headerlink" href="#real-world-example" title="Permanent link">¶</a></h4> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-82-1" name="__codelineno-82-1" href="#__codelineno-82-1"></a><span class="k">def</span><span class="w"> </span><span class="nf">cleanup_old_snapshots</span><span class="p">(</span><span class="n">table_name</span><span class="p">:</span> <span class="nb">str</span><span class="p">,</span> <span class="n">snapshot_ids</span><span class="p">:</span> <span class="nb">list</span><span class="p">[</span><span class="nb">int</span><span class="p">]):</span> |
| <a id="__codelineno-82-2" name="__codelineno-82-2" href="#__codelineno-82-2"></a><span class="w"> </span><span class="sd">"""Remove specific snapshots from a table."""</span> |
| <a id="__codelineno-82-3" name="__codelineno-82-3" href="#__codelineno-82-3"></a> <span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">(</span><span class="s2">"production"</span><span class="p">)</span> |
| <a id="__codelineno-82-4" name="__codelineno-82-4" href="#__codelineno-82-4"></a> <span class="n">table</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">load_table</span><span class="p">(</span><span class="n">table_name</span><span class="p">)</span> |
| <a id="__codelineno-82-5" name="__codelineno-82-5" href="#__codelineno-82-5"></a> |
| <a id="__codelineno-82-6" name="__codelineno-82-6" href="#__codelineno-82-6"></a> <span class="c1"># Use context manager for safe transaction handling</span> |
| <a id="__codelineno-82-7" name="__codelineno-82-7" href="#__codelineno-82-7"></a> <span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">maintenance</span><span class="o">.</span><span class="n">expire_snapshots</span><span class="p">()</span> <span class="k">as</span> <span class="n">expire</span><span class="p">:</span> |
| <a id="__codelineno-82-8" name="__codelineno-82-8" href="#__codelineno-82-8"></a> <span class="k">for</span> <span class="n">snapshot_id</span> <span class="ow">in</span> <span class="n">snapshot_ids</span><span class="p">:</span> |
| <a id="__codelineno-82-9" name="__codelineno-82-9" href="#__codelineno-82-9"></a> <span class="n">expire</span><span class="o">.</span><span class="n">by_id</span><span class="p">(</span><span class="n">snapshot_id</span><span class="p">)</span> |
| <a id="__codelineno-82-10" name="__codelineno-82-10" href="#__codelineno-82-10"></a> |
| <a id="__codelineno-82-11" name="__codelineno-82-11" href="#__codelineno-82-11"></a> <span class="nb">print</span><span class="p">(</span><span class="sa">f</span><span class="s2">"Expired </span><span class="si">{</span><span class="nb">len</span><span class="p">(</span><span class="n">snapshot_ids</span><span class="p">)</span><span class="si">}</span><span class="s2"> snapshots from </span><span class="si">{</span><span class="n">table_name</span><span class="si">}</span><span class="s2">"</span><span class="p">)</span> |
| <a id="__codelineno-82-12" name="__codelineno-82-12" href="#__codelineno-82-12"></a> |
| <a id="__codelineno-82-13" name="__codelineno-82-13" href="#__codelineno-82-13"></a><span class="c1"># Usage</span> |
| <a id="__codelineno-82-14" name="__codelineno-82-14" href="#__codelineno-82-14"></a><span class="n">cleanup_old_snapshots</span><span class="p">(</span><span class="s2">"analytics.user_events"</span><span class="p">,</span> <span class="p">[</span><span class="mi">12345</span><span class="p">,</span> <span class="mi">67890</span><span class="p">,</span> <span class="mi">11111</span><span class="p">])</span> |
| </code></pre></div> |
| <h2 id="views">Views<a class="headerlink" href="#views" title="Permanent link">¶</a></h2> |
| <p>PyIceberg supports view operations.</p> |
| <h3 id="check-if-a-view-exists">Check if a view exists<a class="headerlink" href="#check-if-a-view-exists" title="Permanent link">¶</a></h3> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-83-1" name="__codelineno-83-1" href="#__codelineno-83-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.catalog</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_catalog</span> |
| <a id="__codelineno-83-2" name="__codelineno-83-2" href="#__codelineno-83-2"></a> |
| <a id="__codelineno-83-3" name="__codelineno-83-3" href="#__codelineno-83-3"></a><span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">(</span><span class="s2">"default"</span><span class="p">)</span> |
| <a id="__codelineno-83-4" name="__codelineno-83-4" href="#__codelineno-83-4"></a><span class="n">catalog</span><span class="o">.</span><span class="n">view_exists</span><span class="p">(</span><span class="s2">"default.bar"</span><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="table-statistics-management">Table Statistics Management<a class="headerlink" href="#table-statistics-management" title="Permanent link">¶</a></h2> |
| <p>Manage table statistics with operations through the <code>Table</code> API:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-84-1" name="__codelineno-84-1" href="#__codelineno-84-1"></a><span class="c1"># To run a specific operation</span> |
| <a id="__codelineno-84-2" name="__codelineno-84-2" href="#__codelineno-84-2"></a><span class="n">table</span><span class="o">.</span><span class="n">update_statistics</span><span class="p">()</span><span class="o">.</span><span class="n">set_statistics</span><span class="p">(</span><span class="n">statistics_file</span><span class="o">=</span><span class="n">statistics_file</span><span class="p">)</span><span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-84-3" name="__codelineno-84-3" href="#__codelineno-84-3"></a><span class="c1"># To run multiple operations</span> |
| <a id="__codelineno-84-4" name="__codelineno-84-4" href="#__codelineno-84-4"></a><span class="n">table</span><span class="o">.</span><span class="n">update_statistics</span><span class="p">()</span> |
| <a id="__codelineno-84-5" name="__codelineno-84-5" href="#__codelineno-84-5"></a> <span class="o">.</span><span class="n">set_statistics</span><span class="p">(</span><span class="n">statistics_file1</span><span class="p">)</span> |
| <a id="__codelineno-84-6" name="__codelineno-84-6" href="#__codelineno-84-6"></a> <span class="o">.</span><span class="n">remove_statistics</span><span class="p">(</span><span class="n">snapshot_id2</span><span class="p">)</span> |
| <a id="__codelineno-84-7" name="__codelineno-84-7" href="#__codelineno-84-7"></a> <span class="o">.</span><span class="n">commit</span><span class="p">()</span> |
| <a id="__codelineno-84-8" name="__codelineno-84-8" href="#__codelineno-84-8"></a><span class="c1"># Operations are applied on commit.</span> |
| </code></pre></div> |
| <p>You can also use context managers to make more changes:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-85-1" name="__codelineno-85-1" href="#__codelineno-85-1"></a><span class="k">with</span> <span class="n">table</span><span class="o">.</span><span class="n">update_statistics</span><span class="p">()</span> <span class="k">as</span> <span class="n">update</span><span class="p">:</span> |
| <a id="__codelineno-85-2" name="__codelineno-85-2" href="#__codelineno-85-2"></a> <span class="n">update</span><span class="o">.</span><span class="n">set_statistics</span><span class="p">(</span><span class="n">statistics_file</span><span class="p">)</span> |
| <a id="__codelineno-85-3" name="__codelineno-85-3" href="#__codelineno-85-3"></a> <span class="n">update</span><span class="o">.</span><span class="n">remove_statistics</span><span class="p">(</span><span class="n">snapshot_id2</span><span class="p">)</span> |
| </code></pre></div> |
| <h2 id="query-the-data">Query the data<a class="headerlink" href="#query-the-data" title="Permanent link">¶</a></h2> |
| <p>To query a table, a table scan is needed. A table scan accepts a filter, columns, optionally a limit and a snapshot ID:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-86-1" name="__codelineno-86-1" href="#__codelineno-86-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.catalog</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_catalog</span> |
| <a id="__codelineno-86-2" name="__codelineno-86-2" href="#__codelineno-86-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.expressions</span><span class="w"> </span><span class="kn">import</span> <span class="n">GreaterThanOrEqual</span> |
| <a id="__codelineno-86-3" name="__codelineno-86-3" href="#__codelineno-86-3"></a> |
| <a id="__codelineno-86-4" name="__codelineno-86-4" href="#__codelineno-86-4"></a><span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">(</span><span class="s2">"default"</span><span class="p">)</span> |
| <a id="__codelineno-86-5" name="__codelineno-86-5" href="#__codelineno-86-5"></a><span class="n">table</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">load_table</span><span class="p">(</span><span class="s2">"nyc.taxis"</span><span class="p">)</span> |
| <a id="__codelineno-86-6" name="__codelineno-86-6" href="#__codelineno-86-6"></a> |
| <a id="__codelineno-86-7" name="__codelineno-86-7" href="#__codelineno-86-7"></a><span class="n">scan</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-86-8" name="__codelineno-86-8" href="#__codelineno-86-8"></a> <span class="n">row_filter</span><span class="o">=</span><span class="n">GreaterThanOrEqual</span><span class="p">(</span><span class="s2">"trip_distance"</span><span class="p">,</span> <span class="mf">10.0</span><span class="p">),</span> |
| <a id="__codelineno-86-9" name="__codelineno-86-9" href="#__codelineno-86-9"></a> <span class="n">selected_fields</span><span class="o">=</span><span class="p">(</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">),</span> |
| <a id="__codelineno-86-10" name="__codelineno-86-10" href="#__codelineno-86-10"></a> <span class="n">limit</span><span class="o">=</span><span class="mi">100</span><span class="p">,</span> |
| <a id="__codelineno-86-11" name="__codelineno-86-11" href="#__codelineno-86-11"></a><span class="p">)</span> |
| <a id="__codelineno-86-12" name="__codelineno-86-12" href="#__codelineno-86-12"></a> |
| <a id="__codelineno-86-13" name="__codelineno-86-13" href="#__codelineno-86-13"></a><span class="c1"># Or filter using a string predicate</span> |
| <a id="__codelineno-86-14" name="__codelineno-86-14" href="#__codelineno-86-14"></a><span class="n">scan</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-86-15" name="__codelineno-86-15" href="#__codelineno-86-15"></a> <span class="n">row_filter</span><span class="o">=</span><span class="s2">"trip_distance > 10.0"</span><span class="p">,</span> |
| <a id="__codelineno-86-16" name="__codelineno-86-16" href="#__codelineno-86-16"></a><span class="p">)</span> |
| <a id="__codelineno-86-17" name="__codelineno-86-17" href="#__codelineno-86-17"></a> |
| <a id="__codelineno-86-18" name="__codelineno-86-18" href="#__codelineno-86-18"></a><span class="p">[</span><span class="n">task</span><span class="o">.</span><span class="n">file</span><span class="o">.</span><span class="n">file_path</span> <span class="k">for</span> <span class="n">task</span> <span class="ow">in</span> <span class="n">scan</span><span class="o">.</span><span class="n">plan_files</span><span class="p">()]</span> |
| </code></pre></div> |
| <p>The low level API <code>plan_files</code> methods returns a set of tasks that provide the files that might contain matching rows:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-87-1" name="__codelineno-87-1" href="#__codelineno-87-1"></a><span class="p">[</span> |
| <a id="__codelineno-87-2" name="__codelineno-87-2" href="#__codelineno-87-2"></a><span class="w"> </span><span class="s2">"s3://warehouse/wh/nyc/taxis/data/00003-4-42464649-92dd-41ad-b83b-dea1a2fe4b58-00001.parquet"</span> |
| <a id="__codelineno-87-3" name="__codelineno-87-3" href="#__codelineno-87-3"></a><span class="p">]</span> |
| </code></pre></div> |
| <p>In this case it is up to the engine itself to filter the file itself. Below, <code>to_arrow()</code> and <code>to_duckdb()</code> that already do this for you.</p> |
| <h3 id="apache-arrow">Apache Arrow<a class="headerlink" href="#apache-arrow" title="Permanent link">¶</a></h3> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../"><code>pyarrow</code> to be installed</a>.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <p>Using PyIceberg it is filter out data from a huge table and pull it into a PyArrow table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-88-1" name="__codelineno-88-1" href="#__codelineno-88-1"></a><span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-88-2" name="__codelineno-88-2" href="#__codelineno-88-2"></a> <span class="n">row_filter</span><span class="o">=</span><span class="n">GreaterThanOrEqual</span><span class="p">(</span><span class="s2">"trip_distance"</span><span class="p">,</span> <span class="mf">10.0</span><span class="p">),</span> |
| <a id="__codelineno-88-3" name="__codelineno-88-3" href="#__codelineno-88-3"></a> <span class="n">selected_fields</span><span class="o">=</span><span class="p">(</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">),</span> |
| <a id="__codelineno-88-4" name="__codelineno-88-4" href="#__codelineno-88-4"></a><span class="p">)</span><span class="o">.</span><span class="n">to_arrow</span><span class="p">()</span> |
| </code></pre></div> |
| <p>This will return a PyArrow table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-89-1" name="__codelineno-89-1" href="#__codelineno-89-1"></a><span class="n">pyarrow</span><span class="o">.</span><span class="n">Table</span> |
| <a id="__codelineno-89-2" name="__codelineno-89-2" href="#__codelineno-89-2"></a><span class="n">VendorID</span><span class="p">:</span> <span class="n">int64</span> |
| <a id="__codelineno-89-3" name="__codelineno-89-3" href="#__codelineno-89-3"></a><span class="n">tpep_pickup_datetime</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">us</span><span class="p">,</span> <span class="n">tz</span><span class="o">=+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span><span class="p">]</span> |
| <a id="__codelineno-89-4" name="__codelineno-89-4" href="#__codelineno-89-4"></a><span class="n">tpep_dropoff_datetime</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">us</span><span class="p">,</span> <span class="n">tz</span><span class="o">=+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span><span class="p">]</span> |
| <a id="__codelineno-89-5" name="__codelineno-89-5" href="#__codelineno-89-5"></a><span class="o">----</span> |
| <a id="__codelineno-89-6" name="__codelineno-89-6" href="#__codelineno-89-6"></a><span class="n">VendorID</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">],[</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">1</span><span class="p">,</span><span class="mi">2</span><span class="p">],</span><span class="o">...</span><span class="p">,[</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">6</span><span class="p">,</span><span class="mi">6</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">],[</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">,</span><span class="mi">2</span><span class="p">]]</span> |
| <a id="__codelineno-89-7" name="__codelineno-89-7" href="#__codelineno-89-7"></a><span class="n">tpep_pickup_datetime</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">28</span><span class="p">:</span><span class="mf">05.000000</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">44</span><span class="p">:</span><span class="mf">25.000000</span><span class="p">]]</span> |
| <a id="__codelineno-89-8" name="__codelineno-89-8" href="#__codelineno-89-8"></a><span class="n">tpep_dropoff_datetime</span><span class="p">:</span> <span class="p">[[</span><span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">47</span><span class="p">:</span><span class="mf">59.000000</span><span class="p">,</span><span class="o">...</span><span class="p">,</span><span class="mi">2021</span><span class="o">-</span><span class="mi">05</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">14</span><span class="p">:</span><span class="mf">47.000000</span><span class="p">]]</span> |
| </code></pre></div> |
| <p>This will only pull in the files that that might contain matching rows.</p> |
| <p>One can also return a PyArrow RecordBatchReader, if reading one record batch at a time is preferred:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-90-1" name="__codelineno-90-1" href="#__codelineno-90-1"></a><span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-90-2" name="__codelineno-90-2" href="#__codelineno-90-2"></a> <span class="n">row_filter</span><span class="o">=</span><span class="n">GreaterThanOrEqual</span><span class="p">(</span><span class="s2">"trip_distance"</span><span class="p">,</span> <span class="mf">10.0</span><span class="p">),</span> |
| <a id="__codelineno-90-3" name="__codelineno-90-3" href="#__codelineno-90-3"></a> <span class="n">selected_fields</span><span class="o">=</span><span class="p">(</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">),</span> |
| <a id="__codelineno-90-4" name="__codelineno-90-4" href="#__codelineno-90-4"></a><span class="p">)</span><span class="o">.</span><span class="n">to_arrow_batch_reader</span><span class="p">()</span> |
| </code></pre></div> |
| <h3 id="pandas">Pandas<a class="headerlink" href="#pandas" title="Permanent link">¶</a></h3> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../"><code>pandas</code> to be installed</a>.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <p>PyIceberg makes it easy to filter out data from a huge table and pull it into a Pandas dataframe locally. This will only fetch the relevant Parquet files for the query and apply the filter. This will reduce IO and therefore improve performance and reduce cost.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-91-1" name="__codelineno-91-1" href="#__codelineno-91-1"></a><span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-91-2" name="__codelineno-91-2" href="#__codelineno-91-2"></a> <span class="n">row_filter</span><span class="o">=</span><span class="s2">"trip_distance >= 10.0"</span><span class="p">,</span> |
| <a id="__codelineno-91-3" name="__codelineno-91-3" href="#__codelineno-91-3"></a> <span class="n">selected_fields</span><span class="o">=</span><span class="p">(</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">),</span> |
| <a id="__codelineno-91-4" name="__codelineno-91-4" href="#__codelineno-91-4"></a><span class="p">)</span><span class="o">.</span><span class="n">to_pandas</span><span class="p">()</span> |
| </code></pre></div> |
| <p>This will return a Pandas dataframe:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-92-1" name="__codelineno-92-1" href="#__codelineno-92-1"></a> <span class="n">VendorID</span> <span class="n">tpep_pickup_datetime</span> <span class="n">tpep_dropoff_datetime</span> |
| <a id="__codelineno-92-2" name="__codelineno-92-2" href="#__codelineno-92-2"></a><span class="mi">0</span> <span class="mi">2</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">28</span><span class="p">:</span><span class="mi">05</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">47</span><span class="p">:</span><span class="mi">59</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-3" name="__codelineno-92-3" href="#__codelineno-92-3"></a><span class="mi">1</span> <span class="mi">1</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">39</span><span class="p">:</span><span class="mi">01</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">57</span><span class="p">:</span><span class="mi">39</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-4" name="__codelineno-92-4" href="#__codelineno-92-4"></a><span class="mi">2</span> <span class="mi">2</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">14</span><span class="p">:</span><span class="mi">42</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">42</span><span class="p">:</span><span class="mi">59</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-5" name="__codelineno-92-5" href="#__codelineno-92-5"></a><span class="mi">3</span> <span class="mi">1</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mi">17</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">43</span><span class="p">:</span><span class="mi">38</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-6" name="__codelineno-92-6" href="#__codelineno-92-6"></a><span class="mi">4</span> <span class="mi">1</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">24</span><span class="p">:</span><span class="mi">04</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">56</span><span class="p">:</span><span class="mi">20</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-7" name="__codelineno-92-7" href="#__codelineno-92-7"></a><span class="o">...</span> <span class="o">...</span> <span class="o">...</span> <span class="o">...</span> |
| <a id="__codelineno-92-8" name="__codelineno-92-8" href="#__codelineno-92-8"></a><span class="mi">116976</span> <span class="mi">2</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">56</span><span class="p">:</span><span class="mi">18</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">05</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">29</span><span class="p">:</span><span class="mi">13</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-9" name="__codelineno-92-9" href="#__codelineno-92-9"></a><span class="mi">116977</span> <span class="mi">2</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">07</span><span class="p">:</span><span class="mi">41</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">37</span><span class="p">:</span><span class="mi">18</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-10" name="__codelineno-92-10" href="#__codelineno-92-10"></a><span class="mi">116978</span> <span class="mi">2</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">38</span><span class="p">:</span><span class="mi">28</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">05</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">12</span><span class="p">:</span><span class="mi">04</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-11" name="__codelineno-92-11" href="#__codelineno-92-11"></a><span class="mi">116979</span> <span class="mi">2</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">33</span><span class="p">:</span><span class="mi">00</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">59</span><span class="p">:</span><span class="mi">00</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-12" name="__codelineno-92-12" href="#__codelineno-92-12"></a><span class="mi">116980</span> <span class="mi">2</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">30</span> <span class="mi">23</span><span class="p">:</span><span class="mi">44</span><span class="p">:</span><span class="mi">25</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2021</span><span class="o">-</span><span class="mi">05</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">14</span><span class="p">:</span><span class="mi">47</span><span class="o">+</span><span class="mi">00</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-92-13" name="__codelineno-92-13" href="#__codelineno-92-13"></a> |
| <a id="__codelineno-92-14" name="__codelineno-92-14" href="#__codelineno-92-14"></a><span class="p">[</span><span class="mi">116981</span> <span class="n">rows</span> <span class="n">x</span> <span class="mi">3</span> <span class="n">columns</span><span class="p">]</span> |
| </code></pre></div> |
| <p>It is recommended to use Pandas 2 or later, because it stores the data in an <a href="https://datapythonista.me/blog/pandas-20-and-the-arrow-revolution-part-i">Apache Arrow backend</a> which avoids copies of data.</p> |
| <h3 id="duckdb">DuckDB<a class="headerlink" href="#duckdb" title="Permanent link">¶</a></h3> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../">DuckDB to be installed</a>.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <p>A table scan can also be converted into a in-memory DuckDB table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-93-1" name="__codelineno-93-1" href="#__codelineno-93-1"></a><span class="n">con</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-93-2" name="__codelineno-93-2" href="#__codelineno-93-2"></a> <span class="n">row_filter</span><span class="o">=</span><span class="n">GreaterThanOrEqual</span><span class="p">(</span><span class="s2">"trip_distance"</span><span class="p">,</span> <span class="mf">10.0</span><span class="p">),</span> |
| <a id="__codelineno-93-3" name="__codelineno-93-3" href="#__codelineno-93-3"></a> <span class="n">selected_fields</span><span class="o">=</span><span class="p">(</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">),</span> |
| <a id="__codelineno-93-4" name="__codelineno-93-4" href="#__codelineno-93-4"></a><span class="p">)</span><span class="o">.</span><span class="n">to_duckdb</span><span class="p">(</span><span class="n">table_name</span><span class="o">=</span><span class="s2">"distant_taxi_trips"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>Using the cursor that we can run queries on the DuckDB table:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-94-1" name="__codelineno-94-1" href="#__codelineno-94-1"></a><span class="nb">print</span><span class="p">(</span> |
| <a id="__codelineno-94-2" name="__codelineno-94-2" href="#__codelineno-94-2"></a> <span class="n">con</span><span class="o">.</span><span class="n">execute</span><span class="p">(</span> |
| <a id="__codelineno-94-3" name="__codelineno-94-3" href="#__codelineno-94-3"></a> <span class="s2">"SELECT tpep_dropoff_datetime - tpep_pickup_datetime AS duration FROM distant_taxi_trips LIMIT 4"</span> |
| <a id="__codelineno-94-4" name="__codelineno-94-4" href="#__codelineno-94-4"></a> <span class="p">)</span><span class="o">.</span><span class="n">fetchall</span><span class="p">()</span> |
| <a id="__codelineno-94-5" name="__codelineno-94-5" href="#__codelineno-94-5"></a><span class="p">)</span> |
| <a id="__codelineno-94-6" name="__codelineno-94-6" href="#__codelineno-94-6"></a><span class="p">[</span> |
| <a id="__codelineno-94-7" name="__codelineno-94-7" href="#__codelineno-94-7"></a> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="mi">1194</span><span class="p">),),</span> |
| <a id="__codelineno-94-8" name="__codelineno-94-8" href="#__codelineno-94-8"></a> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="mi">1118</span><span class="p">),),</span> |
| <a id="__codelineno-94-9" name="__codelineno-94-9" href="#__codelineno-94-9"></a> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="mi">1697</span><span class="p">),),</span> |
| <a id="__codelineno-94-10" name="__codelineno-94-10" href="#__codelineno-94-10"></a> <span class="p">(</span><span class="n">datetime</span><span class="o">.</span><span class="n">timedelta</span><span class="p">(</span><span class="n">seconds</span><span class="o">=</span><span class="mi">1581</span><span class="p">),),</span> |
| <a id="__codelineno-94-11" name="__codelineno-94-11" href="#__codelineno-94-11"></a><span class="p">]</span> |
| </code></pre></div> |
| <h3 id="ray">Ray<a class="headerlink" href="#ray" title="Permanent link">¶</a></h3> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../">Ray to be installed</a>.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <p>A table scan can also be converted into a Ray dataset:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-95-1" name="__codelineno-95-1" href="#__codelineno-95-1"></a><span class="n">ray_dataset</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-95-2" name="__codelineno-95-2" href="#__codelineno-95-2"></a> <span class="n">row_filter</span><span class="o">=</span><span class="n">GreaterThanOrEqual</span><span class="p">(</span><span class="s2">"trip_distance"</span><span class="p">,</span> <span class="mf">10.0</span><span class="p">),</span> |
| <a id="__codelineno-95-3" name="__codelineno-95-3" href="#__codelineno-95-3"></a> <span class="n">selected_fields</span><span class="o">=</span><span class="p">(</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">),</span> |
| <a id="__codelineno-95-4" name="__codelineno-95-4" href="#__codelineno-95-4"></a><span class="p">)</span><span class="o">.</span><span class="n">to_ray</span><span class="p">()</span> |
| </code></pre></div> |
| <p>This will return a Ray dataset:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-96-1" name="__codelineno-96-1" href="#__codelineno-96-1"></a><span class="n">Dataset</span><span class="p">(</span> |
| <a id="__codelineno-96-2" name="__codelineno-96-2" href="#__codelineno-96-2"></a> <span class="n">num_blocks</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> |
| <a id="__codelineno-96-3" name="__codelineno-96-3" href="#__codelineno-96-3"></a> <span class="n">num_rows</span><span class="o">=</span><span class="mi">1168798</span><span class="p">,</span> |
| <a id="__codelineno-96-4" name="__codelineno-96-4" href="#__codelineno-96-4"></a> <span class="n">schema</span><span class="o">=</span><span class="p">{</span> |
| <a id="__codelineno-96-5" name="__codelineno-96-5" href="#__codelineno-96-5"></a> <span class="n">VendorID</span><span class="p">:</span> <span class="n">int64</span><span class="p">,</span> |
| <a id="__codelineno-96-6" name="__codelineno-96-6" href="#__codelineno-96-6"></a> <span class="n">tpep_pickup_datetime</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">us</span><span class="p">,</span> <span class="n">tz</span><span class="o">=</span><span class="n">UTC</span><span class="p">],</span> |
| <a id="__codelineno-96-7" name="__codelineno-96-7" href="#__codelineno-96-7"></a> <span class="n">tpep_dropoff_datetime</span><span class="p">:</span> <span class="n">timestamp</span><span class="p">[</span><span class="n">us</span><span class="p">,</span> <span class="n">tz</span><span class="o">=</span><span class="n">UTC</span><span class="p">]</span> |
| <a id="__codelineno-96-8" name="__codelineno-96-8" href="#__codelineno-96-8"></a> <span class="p">}</span> |
| <a id="__codelineno-96-9" name="__codelineno-96-9" href="#__codelineno-96-9"></a><span class="p">)</span> |
| </code></pre></div> |
| <p>Using <a href="https://docs.ray.io/en/latest/data/api/dataset.html">Ray Dataset API</a> to interact with the dataset:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-97-1" name="__codelineno-97-1" href="#__codelineno-97-1"></a><span class="nb">print</span><span class="p">(</span><span class="n">ray_dataset</span><span class="o">.</span><span class="n">take</span><span class="p">(</span><span class="mi">2</span><span class="p">))</span> |
| <a id="__codelineno-97-2" name="__codelineno-97-2" href="#__codelineno-97-2"></a><span class="p">[</span> |
| <a id="__codelineno-97-3" name="__codelineno-97-3" href="#__codelineno-97-3"></a> <span class="p">{</span> |
| <a id="__codelineno-97-4" name="__codelineno-97-4" href="#__codelineno-97-4"></a> <span class="s2">"VendorID"</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> |
| <a id="__codelineno-97-5" name="__codelineno-97-5" href="#__codelineno-97-5"></a> <span class="s2">"tpep_pickup_datetime"</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2008</span><span class="p">,</span> <span class="mi">12</span><span class="p">,</span> <span class="mi">31</span><span class="p">,</span> <span class="mi">23</span><span class="p">,</span> <span class="mi">23</span><span class="p">,</span> <span class="mi">50</span><span class="p">),</span> |
| <a id="__codelineno-97-6" name="__codelineno-97-6" href="#__codelineno-97-6"></a> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2009</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">0</span><span class="p">,</span> <span class="mi">34</span><span class="p">,</span> <span class="mi">31</span><span class="p">),</span> |
| <a id="__codelineno-97-7" name="__codelineno-97-7" href="#__codelineno-97-7"></a> <span class="p">},</span> |
| <a id="__codelineno-97-8" name="__codelineno-97-8" href="#__codelineno-97-8"></a> <span class="p">{</span> |
| <a id="__codelineno-97-9" name="__codelineno-97-9" href="#__codelineno-97-9"></a> <span class="s2">"VendorID"</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> |
| <a id="__codelineno-97-10" name="__codelineno-97-10" href="#__codelineno-97-10"></a> <span class="s2">"tpep_pickup_datetime"</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2008</span><span class="p">,</span> <span class="mi">12</span><span class="p">,</span> <span class="mi">31</span><span class="p">,</span> <span class="mi">23</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">3</span><span class="p">),</span> |
| <a id="__codelineno-97-11" name="__codelineno-97-11" href="#__codelineno-97-11"></a> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">:</span> <span class="n">datetime</span><span class="o">.</span><span class="n">datetime</span><span class="p">(</span><span class="mi">2009</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">16</span><span class="p">,</span> <span class="mi">10</span><span class="p">,</span> <span class="mi">18</span><span class="p">),</span> |
| <a id="__codelineno-97-12" name="__codelineno-97-12" href="#__codelineno-97-12"></a> <span class="p">},</span> |
| <a id="__codelineno-97-13" name="__codelineno-97-13" href="#__codelineno-97-13"></a><span class="p">]</span> |
| </code></pre></div> |
| <h3 id="bodo">Bodo<a class="headerlink" href="#bodo" title="Permanent link">¶</a></h3> |
| <p>PyIceberg interfaces closely with Bodo Dataframes (see <a href="https://docs.bodo.ai/latest/quick_start/quickstart_local_iceberg/">Bodo Iceberg Quick Start</a>), |
| which provides a drop-in replacement for Pandas that applies query, compiler and HPC optimizations automatically. |
| Bodo accelerates and scales Python code from single laptops to large clusters without code rewrites.</p> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../"><code>bodo</code> to be installed</a>.</p> |
| </div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-98-1" name="__codelineno-98-1" href="#__codelineno-98-1"></a><span class="n">pip</span> <span class="n">install</span> <span class="n">pyiceberg</span><span class="p">[</span><span class="s1">'bodo'</span><span class="p">]</span> |
| </code></pre></div> |
| <!-- prettier-ignore-end --> |
| |
| <p>A table can be read easily into a Bodo Dataframe to perform Pandas operations:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-99-1" name="__codelineno-99-1" href="#__codelineno-99-1"></a><span class="n">df</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">to_bodo</span><span class="p">()</span> <span class="c1"># equivalent to `bodo.pandas.read_iceberg_table(table)`</span> |
| <a id="__codelineno-99-2" name="__codelineno-99-2" href="#__codelineno-99-2"></a><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[</span><span class="n">df</span><span class="p">[</span><span class="s2">"trip_distance"</span><span class="p">]</span> <span class="o">>=</span> <span class="mf">10.0</span><span class="p">]</span> |
| <a id="__codelineno-99-3" name="__codelineno-99-3" href="#__codelineno-99-3"></a><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="p">[[</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">]]</span> |
| <a id="__codelineno-99-4" name="__codelineno-99-4" href="#__codelineno-99-4"></a><span class="nb">print</span><span class="p">(</span><span class="n">df</span><span class="p">)</span> |
| </code></pre></div> |
| <p>This creates a lazy query, optimizes it, and runs it on all available cores (print triggers execution):</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-100-1" name="__codelineno-100-1" href="#__codelineno-100-1"></a> <span class="n">VendorID</span> <span class="n">tpep_pickup_datetime</span> <span class="n">tpep_dropoff_datetime</span> |
| <a id="__codelineno-100-2" name="__codelineno-100-2" href="#__codelineno-100-2"></a><span class="mi">0</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">27</span><span class="p">:</span><span class="mi">12</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">49</span><span class="p">:</span><span class="mi">56</span> |
| <a id="__codelineno-100-3" name="__codelineno-100-3" href="#__codelineno-100-3"></a><span class="mi">1</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">09</span><span class="p">:</span><span class="mi">29</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">29</span><span class="p">:</span><span class="mi">23</span> |
| <a id="__codelineno-100-4" name="__codelineno-100-4" href="#__codelineno-100-4"></a><span class="mi">2</span> <span class="mi">1</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">13</span><span class="p">:</span><span class="mi">30</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">44</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-100-5" name="__codelineno-100-5" href="#__codelineno-100-5"></a><span class="mi">3</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">41</span><span class="p">:</span><span class="mi">41</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">01</span><span class="p">:</span><span class="mi">19</span><span class="p">:</span><span class="mi">32</span> |
| <a id="__codelineno-100-6" name="__codelineno-100-6" href="#__codelineno-100-6"></a><span class="mi">4</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">22</span><span class="p">:</span><span class="mi">39</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span> <span class="mi">01</span><span class="p">:</span><span class="mi">30</span><span class="p">:</span><span class="mi">45</span> |
| <a id="__codelineno-100-7" name="__codelineno-100-7" href="#__codelineno-100-7"></a><span class="o">...</span> <span class="o">...</span> <span class="o">...</span> <span class="o">...</span> |
| <a id="__codelineno-100-8" name="__codelineno-100-8" href="#__codelineno-100-8"></a><span class="mi">245478</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">22</span><span class="p">:</span><span class="mi">32</span><span class="p">:</span><span class="mi">57</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">23</span><span class="p">:</span><span class="mi">01</span><span class="p">:</span><span class="mi">48</span> |
| <a id="__codelineno-100-9" name="__codelineno-100-9" href="#__codelineno-100-9"></a><span class="mi">245479</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">22</span><span class="p">:</span><span class="mi">03</span><span class="p">:</span><span class="mi">26</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">22</span><span class="p">:</span><span class="mi">46</span><span class="p">:</span><span class="mi">13</span> |
| <a id="__codelineno-100-10" name="__codelineno-100-10" href="#__codelineno-100-10"></a><span class="mi">245480</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">23</span><span class="p">:</span><span class="mi">25</span><span class="p">:</span><span class="mi">56</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">02</span><span class="o">-</span><span class="mi">01</span> <span class="mi">00</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">42</span> |
| <a id="__codelineno-100-11" name="__codelineno-100-11" href="#__codelineno-100-11"></a><span class="mi">245481</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">23</span><span class="p">:</span><span class="mi">18</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">23</span><span class="p">:</span><span class="mi">46</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-100-12" name="__codelineno-100-12" href="#__codelineno-100-12"></a><span class="mi">245482</span> <span class="mi">2</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">23</span><span class="p">:</span><span class="mi">18</span><span class="p">:</span><span class="mi">00</span> <span class="mi">2023</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">31</span> <span class="mi">23</span><span class="p">:</span><span class="mi">41</span><span class="p">:</span><span class="mi">00</span> |
| <a id="__codelineno-100-13" name="__codelineno-100-13" href="#__codelineno-100-13"></a> |
| <a id="__codelineno-100-14" name="__codelineno-100-14" href="#__codelineno-100-14"></a><span class="p">[</span><span class="mi">245483</span> <span class="n">rows</span> <span class="n">x</span> <span class="mi">3</span> <span class="n">columns</span><span class="p">]</span> |
| </code></pre></div> |
| <p>Bodo is optimized to take advantage of Iceberg features such as hidden partitioning and various statistics for efficient reads.</p> |
| <h3 id="daft">Daft<a class="headerlink" href="#daft" title="Permanent link">¶</a></h3> |
| <p>PyIceberg interfaces closely with Daft Dataframes (see also: <a href="https://docs.daft.ai/en/stable/io/iceberg/">Daft integration with Iceberg</a>) which provides a full lazily optimized query engine interface on top of PyIceberg tables.</p> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../">Daft to be installed</a>.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <p>A table can be read easily into a Daft Dataframe:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-101-1" name="__codelineno-101-1" href="#__codelineno-101-1"></a><span class="n">df</span> <span class="o">=</span> <span class="n">table</span><span class="o">.</span><span class="n">to_daft</span><span class="p">()</span> <span class="c1"># equivalent to `daft.read_iceberg(table)`</span> |
| <a id="__codelineno-101-2" name="__codelineno-101-2" href="#__codelineno-101-2"></a><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">where</span><span class="p">(</span><span class="n">df</span><span class="p">[</span><span class="s2">"trip_distance"</span><span class="p">]</span> <span class="o">>=</span> <span class="mf">10.0</span><span class="p">)</span> |
| <a id="__codelineno-101-3" name="__codelineno-101-3" href="#__codelineno-101-3"></a><span class="n">df</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">select</span><span class="p">(</span><span class="s2">"VendorID"</span><span class="p">,</span> <span class="s2">"tpep_pickup_datetime"</span><span class="p">,</span> <span class="s2">"tpep_dropoff_datetime"</span><span class="p">)</span> |
| </code></pre></div> |
| <p>This returns a Daft Dataframe which is lazily materialized. Printing <code>df</code> will display the schema:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-102-1" name="__codelineno-102-1" href="#__codelineno-102-1"></a><span class="err">╭──────────┬───────────────────────────────┬───────────────────────────────╮</span> |
| <a id="__codelineno-102-2" name="__codelineno-102-2" href="#__codelineno-102-2"></a><span class="err">│</span> <span class="n">VendorID</span> <span class="err">┆</span> <span class="n">tpep_pickup_datetime</span> <span class="err">┆</span> <span class="n">tpep_dropoff_datetime</span> <span class="err">│</span> |
| <a id="__codelineno-102-3" name="__codelineno-102-3" href="#__codelineno-102-3"></a><span class="err">│</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">│</span> |
| <a id="__codelineno-102-4" name="__codelineno-102-4" href="#__codelineno-102-4"></a><span class="err">│</span> <span class="n">Int64</span> <span class="err">┆</span> <span class="n">Timestamp</span><span class="p">(</span><span class="n">Microseconds</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="err">┆</span> <span class="n">Timestamp</span><span class="p">(</span><span class="n">Microseconds</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="err">│</span> |
| <a id="__codelineno-102-5" name="__codelineno-102-5" href="#__codelineno-102-5"></a><span class="err">╰──────────┴───────────────────────────────┴───────────────────────────────╯</span> |
| <a id="__codelineno-102-6" name="__codelineno-102-6" href="#__codelineno-102-6"></a> |
| <a id="__codelineno-102-7" name="__codelineno-102-7" href="#__codelineno-102-7"></a><span class="p">(</span><span class="n">No</span> <span class="n">data</span> <span class="n">to</span> <span class="n">display</span><span class="p">:</span> <span class="n">Dataframe</span> <span class="ow">not</span> <span class="n">materialized</span><span class="p">)</span> |
| </code></pre></div> |
| <p>We can execute the Dataframe to preview the first few rows of the query with <code>df.show()</code>.</p> |
| <p>This is correctly optimized to take advantage of Iceberg features such as hidden partitioning and file-level statistics for efficient reads.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-103-1" name="__codelineno-103-1" href="#__codelineno-103-1"></a><span class="n">df</span><span class="o">.</span><span class="n">show</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span> |
| </code></pre></div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-104-1" name="__codelineno-104-1" href="#__codelineno-104-1"></a><span class="err">╭──────────┬───────────────────────────────┬───────────────────────────────╮</span> |
| <a id="__codelineno-104-2" name="__codelineno-104-2" href="#__codelineno-104-2"></a><span class="err">│</span> <span class="n">VendorID</span> <span class="err">┆</span> <span class="n">tpep_pickup_datetime</span> <span class="err">┆</span> <span class="n">tpep_dropoff_datetime</span> <span class="err">│</span> |
| <a id="__codelineno-104-3" name="__codelineno-104-3" href="#__codelineno-104-3"></a><span class="err">│</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">│</span> |
| <a id="__codelineno-104-4" name="__codelineno-104-4" href="#__codelineno-104-4"></a><span class="err">│</span> <span class="n">Int64</span> <span class="err">┆</span> <span class="n">Timestamp</span><span class="p">(</span><span class="n">Microseconds</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="err">┆</span> <span class="n">Timestamp</span><span class="p">(</span><span class="n">Microseconds</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span> <span class="err">│</span> |
| <a id="__codelineno-104-5" name="__codelineno-104-5" href="#__codelineno-104-5"></a><span class="err">╞══════════╪═══════════════════════════════╪═══════════════════════════════╡</span> |
| <a id="__codelineno-104-6" name="__codelineno-104-6" href="#__codelineno-104-6"></a><span class="err">│</span> <span class="mi">2</span> <span class="err">┆</span> <span class="mi">2008</span><span class="o">-</span><span class="mi">12</span><span class="o">-</span><span class="mi">31</span><span class="n">T23</span><span class="p">:</span><span class="mi">23</span><span class="p">:</span><span class="mf">50.000000</span> <span class="err">┆</span> <span class="mi">2009</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span><span class="n">T00</span><span class="p">:</span><span class="mi">34</span><span class="p">:</span><span class="mf">31.000000</span> <span class="err">│</span> |
| <a id="__codelineno-104-7" name="__codelineno-104-7" href="#__codelineno-104-7"></a><span class="err">├╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤</span> |
| <a id="__codelineno-104-8" name="__codelineno-104-8" href="#__codelineno-104-8"></a><span class="err">│</span> <span class="mi">2</span> <span class="err">┆</span> <span class="mi">2008</span><span class="o">-</span><span class="mi">12</span><span class="o">-</span><span class="mi">31</span><span class="n">T23</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mf">03.000000</span> <span class="err">┆</span> <span class="mi">2009</span><span class="o">-</span><span class="mi">01</span><span class="o">-</span><span class="mi">01</span><span class="n">T16</span><span class="p">:</span><span class="mi">10</span><span class="p">:</span><span class="mf">18.000000</span> <span class="err">│</span> |
| <a id="__codelineno-104-9" name="__codelineno-104-9" href="#__codelineno-104-9"></a><span class="err">╰──────────┴───────────────────────────────┴───────────────────────────────╯</span> |
| <a id="__codelineno-104-10" name="__codelineno-104-10" href="#__codelineno-104-10"></a> |
| <a id="__codelineno-104-11" name="__codelineno-104-11" href="#__codelineno-104-11"></a><span class="p">(</span><span class="n">Showing</span> <span class="n">first</span> <span class="mi">2</span> <span class="n">rows</span><span class="p">)</span> |
| </code></pre></div> |
| <h3 id="polars">Polars<a class="headerlink" href="#polars" title="Permanent link">¶</a></h3> |
| <p>PyIceberg interfaces closely with Polars Dataframes and LazyFrame which provides a full lazily optimized query engine interface on top of PyIceberg tables.</p> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../"><code>polars</code> to be installed</a>.</p> |
| </div> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-105-1" name="__codelineno-105-1" href="#__codelineno-105-1"></a><span class="n">pip</span> <span class="n">install</span> <span class="n">pyiceberg</span><span class="p">[</span><span class="s1">'polars'</span><span class="p">]</span> |
| </code></pre></div> |
| <!-- prettier-ignore-end --> |
| |
| <p>PyIceberg data can be analyzed and accessed through Polars using either DataFrame or LazyFrame. |
| If your code utilizes the Apache Iceberg data scanning and retrieval API and then analyzes the resulting DataFrame in Polars, use the <code>table.scan().to_polars()</code> API. |
| If the intent is to utilize Polars' high-performance filtering and retrieval functionalities, use LazyFrame exported from the Iceberg table with the <code>table.to_polars()</code> API.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-106-1" name="__codelineno-106-1" href="#__codelineno-106-1"></a><span class="c1"># Get LazyFrame</span> |
| <a id="__codelineno-106-2" name="__codelineno-106-2" href="#__codelineno-106-2"></a><span class="n">iceberg_table</span><span class="o">.</span><span class="n">to_polars</span><span class="p">()</span> |
| <a id="__codelineno-106-3" name="__codelineno-106-3" href="#__codelineno-106-3"></a> |
| <a id="__codelineno-106-4" name="__codelineno-106-4" href="#__codelineno-106-4"></a><span class="c1"># Get Data Frame</span> |
| <a id="__codelineno-106-5" name="__codelineno-106-5" href="#__codelineno-106-5"></a><span class="n">iceberg_table</span><span class="o">.</span><span class="n">scan</span><span class="p">()</span><span class="o">.</span><span class="n">to_polars</span><span class="p">()</span> |
| </code></pre></div> |
| <h4 id="working-with-polars-dataframe">Working with Polars DataFrame<a class="headerlink" href="#working-with-polars-dataframe" title="Permanent link">¶</a></h4> |
| <p>PyIceberg makes it easy to filter out data from a huge table and pull it into a Polars dataframe locally. This will only fetch the relevant Parquet files for the query and apply the filter. This will reduce IO and therefore improve performance and reduce cost.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-107-1" name="__codelineno-107-1" href="#__codelineno-107-1"></a><span class="n">schema</span> <span class="o">=</span> <span class="n">Schema</span><span class="p">(</span> |
| <a id="__codelineno-107-2" name="__codelineno-107-2" href="#__codelineno-107-2"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">1</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'ticket_id'</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">LongType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-107-3" name="__codelineno-107-3" href="#__codelineno-107-3"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">2</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'customer_id'</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">LongType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-107-4" name="__codelineno-107-4" href="#__codelineno-107-4"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">3</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'issue'</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">StringType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">False</span><span class="p">),</span> |
| <a id="__codelineno-107-5" name="__codelineno-107-5" href="#__codelineno-107-5"></a> <span class="n">NestedField</span><span class="p">(</span><span class="n">field_id</span><span class="o">=</span><span class="mi">4</span><span class="p">,</span> <span class="n">name</span><span class="o">=</span><span class="s1">'created_at'</span><span class="p">,</span> <span class="n">field_type</span><span class="o">=</span><span class="n">TimestampType</span><span class="p">(),</span> <span class="n">required</span><span class="o">=</span><span class="kc">True</span><span class="p">),</span> |
| <a id="__codelineno-107-6" name="__codelineno-107-6" href="#__codelineno-107-6"></a> <span class="n">required</span><span class="o">=</span><span class="kc">True</span> |
| <a id="__codelineno-107-7" name="__codelineno-107-7" href="#__codelineno-107-7"></a><span class="p">)</span> |
| <a id="__codelineno-107-8" name="__codelineno-107-8" href="#__codelineno-107-8"></a> |
| <a id="__codelineno-107-9" name="__codelineno-107-9" href="#__codelineno-107-9"></a><span class="n">iceberg_table</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span> |
| <a id="__codelineno-107-10" name="__codelineno-107-10" href="#__codelineno-107-10"></a> <span class="n">identifier</span><span class="o">=</span><span class="s1">'default.product_support_issues'</span><span class="p">,</span> |
| <a id="__codelineno-107-11" name="__codelineno-107-11" href="#__codelineno-107-11"></a> <span class="n">schema</span><span class="o">=</span><span class="n">schema</span> |
| <a id="__codelineno-107-12" name="__codelineno-107-12" href="#__codelineno-107-12"></a><span class="p">)</span> |
| <a id="__codelineno-107-13" name="__codelineno-107-13" href="#__codelineno-107-13"></a> |
| <a id="__codelineno-107-14" name="__codelineno-107-14" href="#__codelineno-107-14"></a><span class="n">pa_table_data</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_pylist</span><span class="p">(</span> |
| <a id="__codelineno-107-15" name="__codelineno-107-15" href="#__codelineno-107-15"></a> <span class="p">[</span> |
| <a id="__codelineno-107-16" name="__codelineno-107-16" href="#__codelineno-107-16"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">546</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'User Login issue'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650020000000000</span><span class="p">},</span> |
| <a id="__codelineno-107-17" name="__codelineno-107-17" href="#__codelineno-107-17"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">2</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">547</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Payment not going through'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650028640000000</span><span class="p">},</span> |
| <a id="__codelineno-107-18" name="__codelineno-107-18" href="#__codelineno-107-18"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">3</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">548</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Error on checkout'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650037280000000</span><span class="p">},</span> |
| <a id="__codelineno-107-19" name="__codelineno-107-19" href="#__codelineno-107-19"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">4</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">549</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Unable to reset password'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650045920000000</span><span class="p">},</span> |
| <a id="__codelineno-107-20" name="__codelineno-107-20" href="#__codelineno-107-20"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">5</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">550</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Account locked'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650054560000000</span><span class="p">},</span> |
| <a id="__codelineno-107-21" name="__codelineno-107-21" href="#__codelineno-107-21"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">6</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">551</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Order not received'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650063200000000</span><span class="p">},</span> |
| <a id="__codelineno-107-22" name="__codelineno-107-22" href="#__codelineno-107-22"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">7</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">552</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Refund not processed'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650071840000000</span><span class="p">},</span> |
| <a id="__codelineno-107-23" name="__codelineno-107-23" href="#__codelineno-107-23"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">8</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">553</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Shipping address issue'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650080480000000</span><span class="p">},</span> |
| <a id="__codelineno-107-24" name="__codelineno-107-24" href="#__codelineno-107-24"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">9</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">554</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Product damaged'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650089120000000</span><span class="p">},</span> |
| <a id="__codelineno-107-25" name="__codelineno-107-25" href="#__codelineno-107-25"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">10</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">555</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Unable to apply discount code'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650097760000000</span><span class="p">},</span> |
| <a id="__codelineno-107-26" name="__codelineno-107-26" href="#__codelineno-107-26"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">11</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">556</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Website not loading'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650106400000000</span><span class="p">},</span> |
| <a id="__codelineno-107-27" name="__codelineno-107-27" href="#__codelineno-107-27"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">12</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">557</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Incorrect order received'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650115040000000</span><span class="p">},</span> |
| <a id="__codelineno-107-28" name="__codelineno-107-28" href="#__codelineno-107-28"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">13</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">558</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Unable to track order'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650123680000000</span><span class="p">},</span> |
| <a id="__codelineno-107-29" name="__codelineno-107-29" href="#__codelineno-107-29"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">14</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">559</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Order delayed'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650132320000000</span><span class="p">},</span> |
| <a id="__codelineno-107-30" name="__codelineno-107-30" href="#__codelineno-107-30"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">15</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">560</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Product not as described'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650140960000000</span><span class="p">},</span> |
| <a id="__codelineno-107-31" name="__codelineno-107-31" href="#__codelineno-107-31"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">16</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">561</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Unable to contact support'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650149600000000</span><span class="p">},</span> |
| <a id="__codelineno-107-32" name="__codelineno-107-32" href="#__codelineno-107-32"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">17</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">562</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Duplicate charge'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650158240000000</span><span class="p">},</span> |
| <a id="__codelineno-107-33" name="__codelineno-107-33" href="#__codelineno-107-33"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">18</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">563</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Unable to update profile'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650166880000000</span><span class="p">},</span> |
| <a id="__codelineno-107-34" name="__codelineno-107-34" href="#__codelineno-107-34"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">19</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">564</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'App crashing'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650175520000000</span><span class="p">},</span> |
| <a id="__codelineno-107-35" name="__codelineno-107-35" href="#__codelineno-107-35"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">20</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">565</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Unable to download invoice'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650184160000000</span><span class="p">},</span> |
| <a id="__codelineno-107-36" name="__codelineno-107-36" href="#__codelineno-107-36"></a> <span class="p">{</span><span class="s1">'ticket_id'</span><span class="p">:</span> <span class="mi">21</span><span class="p">,</span> <span class="s1">'customer_id'</span><span class="p">:</span> <span class="mi">566</span><span class="p">,</span> <span class="s1">'issue'</span><span class="p">:</span> <span class="s1">'Incorrect billing amount'</span><span class="p">,</span> <span class="s1">'created_at'</span><span class="p">:</span> <span class="mi">1650192800000000</span><span class="p">},</span> |
| <a id="__codelineno-107-37" name="__codelineno-107-37" href="#__codelineno-107-37"></a> <span class="p">],</span> <span class="n">schema</span><span class="o">=</span><span class="n">iceberg_table</span><span class="o">.</span><span class="n">schema</span><span class="p">()</span><span class="o">.</span><span class="n">as_arrow</span><span class="p">()</span> |
| <a id="__codelineno-107-38" name="__codelineno-107-38" href="#__codelineno-107-38"></a><span class="p">)</span> |
| <a id="__codelineno-107-39" name="__codelineno-107-39" href="#__codelineno-107-39"></a> |
| <a id="__codelineno-107-40" name="__codelineno-107-40" href="#__codelineno-107-40"></a><span class="n">iceberg_table</span><span class="o">.</span><span class="n">append</span><span class="p">(</span> |
| <a id="__codelineno-107-41" name="__codelineno-107-41" href="#__codelineno-107-41"></a> <span class="n">df</span><span class="o">=</span><span class="n">pa_table_data</span> |
| <a id="__codelineno-107-42" name="__codelineno-107-42" href="#__codelineno-107-42"></a><span class="p">)</span> |
| <a id="__codelineno-107-43" name="__codelineno-107-43" href="#__codelineno-107-43"></a> |
| <a id="__codelineno-107-44" name="__codelineno-107-44" href="#__codelineno-107-44"></a><span class="n">table</span><span class="o">.</span><span class="n">scan</span><span class="p">(</span> |
| <a id="__codelineno-107-45" name="__codelineno-107-45" href="#__codelineno-107-45"></a> <span class="n">row_filter</span><span class="o">=</span><span class="s2">"ticket_id > 10"</span><span class="p">,</span> |
| <a id="__codelineno-107-46" name="__codelineno-107-46" href="#__codelineno-107-46"></a><span class="p">)</span><span class="o">.</span><span class="n">to_polars</span><span class="p">()</span> |
| </code></pre></div> |
| <p>This will return a Polars DataFrame:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-108-1" name="__codelineno-108-1" href="#__codelineno-108-1"></a><span class="n">shape</span><span class="p">:</span> <span class="p">(</span><span class="mi">11</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> |
| <a id="__codelineno-108-2" name="__codelineno-108-2" href="#__codelineno-108-2"></a><span class="err">┌───────────┬─────────────┬────────────────────────────┬─────────────────────┐</span> |
| <a id="__codelineno-108-3" name="__codelineno-108-3" href="#__codelineno-108-3"></a><span class="err">│</span> <span class="n">ticket_id</span> <span class="err">┆</span> <span class="n">customer_id</span> <span class="err">┆</span> <span class="n">issue</span> <span class="err">┆</span> <span class="n">created_at</span> <span class="err">│</span> |
| <a id="__codelineno-108-4" name="__codelineno-108-4" href="#__codelineno-108-4"></a><span class="err">│</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">│</span> |
| <a id="__codelineno-108-5" name="__codelineno-108-5" href="#__codelineno-108-5"></a><span class="err">│</span> <span class="n">i64</span> <span class="err">┆</span> <span class="n">i64</span> <span class="err">┆</span> <span class="nb">str</span> <span class="err">┆</span> <span class="n">datetime</span><span class="p">[</span><span class="n">μs</span><span class="p">]</span> <span class="err">│</span> |
| <a id="__codelineno-108-6" name="__codelineno-108-6" href="#__codelineno-108-6"></a><span class="err">╞═══════════╪═════════════╪════════════════════════════╪═════════════════════╡</span> |
| <a id="__codelineno-108-7" name="__codelineno-108-7" href="#__codelineno-108-7"></a><span class="err">│</span> <span class="mi">11</span> <span class="err">┆</span> <span class="mi">556</span> <span class="err">┆</span> <span class="n">Website</span> <span class="ow">not</span> <span class="n">loading</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">10</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-8" name="__codelineno-108-8" href="#__codelineno-108-8"></a><span class="err">│</span> <span class="mi">12</span> <span class="err">┆</span> <span class="mi">557</span> <span class="err">┆</span> <span class="n">Incorrect</span> <span class="n">order</span> <span class="n">received</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">13</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-9" name="__codelineno-108-9" href="#__codelineno-108-9"></a><span class="err">│</span> <span class="mi">13</span> <span class="err">┆</span> <span class="mi">558</span> <span class="err">┆</span> <span class="n">Unable</span> <span class="n">to</span> <span class="n">track</span> <span class="n">order</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">15</span><span class="p">:</span><span class="mi">41</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-10" name="__codelineno-108-10" href="#__codelineno-108-10"></a><span class="err">│</span> <span class="mi">14</span> <span class="err">┆</span> <span class="mi">559</span> <span class="err">┆</span> <span class="n">Order</span> <span class="n">delayed</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">18</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-11" name="__codelineno-108-11" href="#__codelineno-108-11"></a><span class="err">│</span> <span class="mi">15</span> <span class="err">┆</span> <span class="mi">560</span> <span class="err">┆</span> <span class="n">Product</span> <span class="ow">not</span> <span class="k">as</span> <span class="n">described</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">20</span><span class="p">:</span><span class="mi">29</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-12" name="__codelineno-108-12" href="#__codelineno-108-12"></a><span class="err">│</span> <span class="err">…</span> <span class="err">┆</span> <span class="err">…</span> <span class="err">┆</span> <span class="err">…</span> <span class="err">┆</span> <span class="err">…</span> <span class="err">│</span> |
| <a id="__codelineno-108-13" name="__codelineno-108-13" href="#__codelineno-108-13"></a><span class="err">│</span> <span class="mi">17</span> <span class="err">┆</span> <span class="mi">562</span> <span class="err">┆</span> <span class="n">Duplicate</span> <span class="n">charge</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">01</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-14" name="__codelineno-108-14" href="#__codelineno-108-14"></a><span class="err">│</span> <span class="mi">18</span> <span class="err">┆</span> <span class="mi">563</span> <span class="err">┆</span> <span class="n">Unable</span> <span class="n">to</span> <span class="n">update</span> <span class="n">profile</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">03</span><span class="p">:</span><span class="mi">41</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-15" name="__codelineno-108-15" href="#__codelineno-108-15"></a><span class="err">│</span> <span class="mi">19</span> <span class="err">┆</span> <span class="mi">564</span> <span class="err">┆</span> <span class="n">App</span> <span class="n">crashing</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">06</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-16" name="__codelineno-108-16" href="#__codelineno-108-16"></a><span class="err">│</span> <span class="mi">20</span> <span class="err">┆</span> <span class="mi">565</span> <span class="err">┆</span> <span class="n">Unable</span> <span class="n">to</span> <span class="n">download</span> <span class="n">invoice</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">08</span><span class="p">:</span><span class="mi">29</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-17" name="__codelineno-108-17" href="#__codelineno-108-17"></a><span class="err">│</span> <span class="mi">21</span> <span class="err">┆</span> <span class="mi">566</span> <span class="err">┆</span> <span class="n">Incorrect</span> <span class="n">billing</span> <span class="n">amount</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">10</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-108-18" name="__codelineno-108-18" href="#__codelineno-108-18"></a><span class="err">└───────────┴─────────────┴────────────────────────────┴─────────────────────┘</span> |
| </code></pre></div> |
| <h4 id="working-with-polars-lazyframe">Working with Polars LazyFrame<a class="headerlink" href="#working-with-polars-lazyframe" title="Permanent link">¶</a></h4> |
| <p>PyIceberg supports creation of a Polars LazyFrame based on an Iceberg Table.</p> |
| <p>using the above code example:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-109-1" name="__codelineno-109-1" href="#__codelineno-109-1"></a><span class="n">lf</span> <span class="o">=</span> <span class="n">iceberg_table</span><span class="o">.</span><span class="n">to_polars</span><span class="p">()</span><span class="o">.</span><span class="n">filter</span><span class="p">(</span><span class="n">pl</span><span class="o">.</span><span class="n">col</span><span class="p">(</span><span class="s2">"ticket_id"</span><span class="p">)</span> <span class="o">></span> <span class="mi">10</span><span class="p">)</span> |
| <a id="__codelineno-109-2" name="__codelineno-109-2" href="#__codelineno-109-2"></a><span class="nb">print</span><span class="p">(</span><span class="n">lf</span><span class="o">.</span><span class="n">collect</span><span class="p">())</span> |
| </code></pre></div> |
| <p>This above code snippet returns a Polars LazyFrame and defines a filter to be executed by Polars:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-110-1" name="__codelineno-110-1" href="#__codelineno-110-1"></a><span class="n">shape</span><span class="p">:</span> <span class="p">(</span><span class="mi">11</span><span class="p">,</span> <span class="mi">4</span><span class="p">)</span> |
| <a id="__codelineno-110-2" name="__codelineno-110-2" href="#__codelineno-110-2"></a><span class="err">┌───────────┬─────────────┬────────────────────────────┬─────────────────────┐</span> |
| <a id="__codelineno-110-3" name="__codelineno-110-3" href="#__codelineno-110-3"></a><span class="err">│</span> <span class="n">ticket_id</span> <span class="err">┆</span> <span class="n">customer_id</span> <span class="err">┆</span> <span class="n">issue</span> <span class="err">┆</span> <span class="n">created_at</span> <span class="err">│</span> |
| <a id="__codelineno-110-4" name="__codelineno-110-4" href="#__codelineno-110-4"></a><span class="err">│</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">┆</span> <span class="o">---</span> <span class="err">│</span> |
| <a id="__codelineno-110-5" name="__codelineno-110-5" href="#__codelineno-110-5"></a><span class="err">│</span> <span class="n">i64</span> <span class="err">┆</span> <span class="n">i64</span> <span class="err">┆</span> <span class="nb">str</span> <span class="err">┆</span> <span class="n">datetime</span><span class="p">[</span><span class="n">μs</span><span class="p">]</span> <span class="err">│</span> |
| <a id="__codelineno-110-6" name="__codelineno-110-6" href="#__codelineno-110-6"></a><span class="err">╞═══════════╪═════════════╪════════════════════════════╪═════════════════════╡</span> |
| <a id="__codelineno-110-7" name="__codelineno-110-7" href="#__codelineno-110-7"></a><span class="err">│</span> <span class="mi">11</span> <span class="err">┆</span> <span class="mi">556</span> <span class="err">┆</span> <span class="n">Website</span> <span class="ow">not</span> <span class="n">loading</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">10</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-8" name="__codelineno-110-8" href="#__codelineno-110-8"></a><span class="err">│</span> <span class="mi">12</span> <span class="err">┆</span> <span class="mi">557</span> <span class="err">┆</span> <span class="n">Incorrect</span> <span class="n">order</span> <span class="n">received</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">13</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-9" name="__codelineno-110-9" href="#__codelineno-110-9"></a><span class="err">│</span> <span class="mi">13</span> <span class="err">┆</span> <span class="mi">558</span> <span class="err">┆</span> <span class="n">Unable</span> <span class="n">to</span> <span class="n">track</span> <span class="n">order</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">15</span><span class="p">:</span><span class="mi">41</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-10" name="__codelineno-110-10" href="#__codelineno-110-10"></a><span class="err">│</span> <span class="mi">14</span> <span class="err">┆</span> <span class="mi">559</span> <span class="err">┆</span> <span class="n">Order</span> <span class="n">delayed</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">18</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-11" name="__codelineno-110-11" href="#__codelineno-110-11"></a><span class="err">│</span> <span class="mi">15</span> <span class="err">┆</span> <span class="mi">560</span> <span class="err">┆</span> <span class="n">Product</span> <span class="ow">not</span> <span class="k">as</span> <span class="n">described</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">16</span> <span class="mi">20</span><span class="p">:</span><span class="mi">29</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-12" name="__codelineno-110-12" href="#__codelineno-110-12"></a><span class="err">│</span> <span class="err">…</span> <span class="err">┆</span> <span class="err">…</span> <span class="err">┆</span> <span class="err">…</span> <span class="err">┆</span> <span class="err">…</span> <span class="err">│</span> |
| <a id="__codelineno-110-13" name="__codelineno-110-13" href="#__codelineno-110-13"></a><span class="err">│</span> <span class="mi">17</span> <span class="err">┆</span> <span class="mi">562</span> <span class="err">┆</span> <span class="n">Duplicate</span> <span class="n">charge</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">01</span><span class="p">:</span><span class="mi">17</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-14" name="__codelineno-110-14" href="#__codelineno-110-14"></a><span class="err">│</span> <span class="mi">18</span> <span class="err">┆</span> <span class="mi">563</span> <span class="err">┆</span> <span class="n">Unable</span> <span class="n">to</span> <span class="n">update</span> <span class="n">profile</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">03</span><span class="p">:</span><span class="mi">41</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-15" name="__codelineno-110-15" href="#__codelineno-110-15"></a><span class="err">│</span> <span class="mi">19</span> <span class="err">┆</span> <span class="mi">564</span> <span class="err">┆</span> <span class="n">App</span> <span class="n">crashing</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">06</span><span class="p">:</span><span class="mi">05</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-16" name="__codelineno-110-16" href="#__codelineno-110-16"></a><span class="err">│</span> <span class="mi">20</span> <span class="err">┆</span> <span class="mi">565</span> <span class="err">┆</span> <span class="n">Unable</span> <span class="n">to</span> <span class="n">download</span> <span class="n">invoice</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">08</span><span class="p">:</span><span class="mi">29</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-17" name="__codelineno-110-17" href="#__codelineno-110-17"></a><span class="err">│</span> <span class="mi">21</span> <span class="err">┆</span> <span class="mi">566</span> <span class="err">┆</span> <span class="n">Incorrect</span> <span class="n">billing</span> <span class="n">amount</span> <span class="err">┆</span> <span class="mi">2022</span><span class="o">-</span><span class="mi">04</span><span class="o">-</span><span class="mi">17</span> <span class="mi">10</span><span class="p">:</span><span class="mi">53</span><span class="p">:</span><span class="mi">20</span> <span class="err">│</span> |
| <a id="__codelineno-110-18" name="__codelineno-110-18" href="#__codelineno-110-18"></a><span class="err">└───────────┴─────────────┴────────────────────────────┴─────────────────────┘</span> |
| </code></pre></div> |
| <h3 id="apache-datafusion">Apache DataFusion<a class="headerlink" href="#apache-datafusion" title="Permanent link">¶</a></h3> |
| <p>PyIceberg integrates with <a href="https://datafusion.apache.org/">Apache DataFusion</a> through the Custom Table Provider interface (<a href="https://datafusion.apache.org/python/user-guide/io/table_provider.html">FFI_TableProvider</a>) exposed through <code>iceberg-rust</code>.</p> |
| <!-- prettier-ignore-start --> |
| |
| <div class="admonition note"> |
| <p class="admonition-title">Requirements</p> |
| <p>This requires <a href="../"><code>datafusion</code> and <code>pyiceberg-core</code> to be installed</a>.</p> |
| </div> |
| <!-- prettier-ignore-end --> |
| |
| <!-- markdownlint-disable MD046 -- Allowing indented multi-line formatting in admonition--> |
| |
| <div class="admonition warning"> |
| <p class="admonition-title">Experimental Feature</p> |
| <p>The DataFusion integration is considered <strong>experimental</strong>.</p> |
| <p>The integration has a few caveats:</p> |
| <ul> |
| <li>Only works with <code>datafusion == 51</code>, aligns with the version used in <code>pyiceberg-core</code></li> |
| <li>Depends directly on <code>iceberg-rust</code> instead of PyIceberg's implementation</li> |
| <li>Has limited features compared to the full PyIceberg API</li> |
| </ul> |
| <p>The integration will improve as both DataFusion and <code>iceberg-rust</code> matures.</p> |
| </div> |
| <!-- markdownlint-enable MD046 --> |
| |
| <p>PyIceberg tables can be registered directly with DataFusion's SessionContext using the table provider interface.</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-111-1" name="__codelineno-111-1" href="#__codelineno-111-1"></a><span class="kn">from</span><span class="w"> </span><span class="nn">datafusion</span><span class="w"> </span><span class="kn">import</span> <span class="n">SessionContext</span> |
| <a id="__codelineno-111-2" name="__codelineno-111-2" href="#__codelineno-111-2"></a><span class="kn">from</span><span class="w"> </span><span class="nn">pyiceberg.catalog</span><span class="w"> </span><span class="kn">import</span> <span class="n">load_catalog</span> |
| <a id="__codelineno-111-3" name="__codelineno-111-3" href="#__codelineno-111-3"></a><span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span> |
| <a id="__codelineno-111-4" name="__codelineno-111-4" href="#__codelineno-111-4"></a> |
| <a id="__codelineno-111-5" name="__codelineno-111-5" href="#__codelineno-111-5"></a><span class="c1"># Load catalog and create/load a table</span> |
| <a id="__codelineno-111-6" name="__codelineno-111-6" href="#__codelineno-111-6"></a><span class="n">catalog</span> <span class="o">=</span> <span class="n">load_catalog</span><span class="p">(</span><span class="s2">"catalog"</span><span class="p">,</span> <span class="nb">type</span><span class="o">=</span><span class="s2">"in-memory"</span><span class="p">)</span> |
| <a id="__codelineno-111-7" name="__codelineno-111-7" href="#__codelineno-111-7"></a><span class="n">catalog</span><span class="o">.</span><span class="n">create_namespace_if_not_exists</span><span class="p">(</span><span class="s2">"default"</span><span class="p">)</span> |
| <a id="__codelineno-111-8" name="__codelineno-111-8" href="#__codelineno-111-8"></a> |
| <a id="__codelineno-111-9" name="__codelineno-111-9" href="#__codelineno-111-9"></a><span class="c1"># Create some sample data</span> |
| <a id="__codelineno-111-10" name="__codelineno-111-10" href="#__codelineno-111-10"></a><span class="n">data</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">table</span><span class="p">({</span><span class="s2">"x"</span><span class="p">:</span> <span class="p">[</span><span class="mi">1</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">3</span><span class="p">],</span> <span class="s2">"y"</span><span class="p">:</span> <span class="p">[</span><span class="mi">4</span><span class="p">,</span> <span class="mi">5</span><span class="p">,</span> <span class="mi">6</span><span class="p">]})</span> |
| <a id="__codelineno-111-11" name="__codelineno-111-11" href="#__codelineno-111-11"></a><span class="n">iceberg_table</span> <span class="o">=</span> <span class="n">catalog</span><span class="o">.</span><span class="n">create_table</span><span class="p">(</span><span class="s2">"default.test"</span><span class="p">,</span> <span class="n">schema</span><span class="o">=</span><span class="n">data</span><span class="o">.</span><span class="n">schema</span><span class="p">)</span> |
| <a id="__codelineno-111-12" name="__codelineno-111-12" href="#__codelineno-111-12"></a><span class="n">iceberg_table</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">data</span><span class="p">)</span> |
| <a id="__codelineno-111-13" name="__codelineno-111-13" href="#__codelineno-111-13"></a> |
| <a id="__codelineno-111-14" name="__codelineno-111-14" href="#__codelineno-111-14"></a><span class="c1"># Register the table with DataFusion</span> |
| <a id="__codelineno-111-15" name="__codelineno-111-15" href="#__codelineno-111-15"></a><span class="n">ctx</span> <span class="o">=</span> <span class="n">SessionContext</span><span class="p">()</span> |
| <a id="__codelineno-111-16" name="__codelineno-111-16" href="#__codelineno-111-16"></a><span class="n">ctx</span><span class="o">.</span><span class="n">register_table</span><span class="p">(</span><span class="s2">"test"</span><span class="p">,</span> <span class="n">iceberg_table</span><span class="p">)</span> |
| <a id="__codelineno-111-17" name="__codelineno-111-17" href="#__codelineno-111-17"></a> |
| <a id="__codelineno-111-18" name="__codelineno-111-18" href="#__codelineno-111-18"></a><span class="c1"># Query the table using DataFusion SQL</span> |
| <a id="__codelineno-111-19" name="__codelineno-111-19" href="#__codelineno-111-19"></a><span class="n">ctx</span><span class="o">.</span><span class="n">table</span><span class="p">(</span><span class="s2">"test"</span><span class="p">)</span><span class="o">.</span><span class="n">show</span><span class="p">()</span> |
| </code></pre></div> |
| <p>This will output:</p> |
| <div class="highlight"><pre><span></span><code><a id="__codelineno-112-1" name="__codelineno-112-1" href="#__codelineno-112-1"></a><span class="n">DataFrame</span><span class="p">()</span> |
| <a id="__codelineno-112-2" name="__codelineno-112-2" href="#__codelineno-112-2"></a><span class="o">+---+---+</span> |
| <a id="__codelineno-112-3" name="__codelineno-112-3" href="#__codelineno-112-3"></a><span class="o">|</span> <span class="n">x</span> <span class="o">|</span> <span class="n">y</span> <span class="o">|</span> |
| <a id="__codelineno-112-4" name="__codelineno-112-4" href="#__codelineno-112-4"></a><span class="o">+---+---+</span> |
| <a id="__codelineno-112-5" name="__codelineno-112-5" href="#__codelineno-112-5"></a><span class="o">|</span> <span class="mi">1</span> <span class="o">|</span> <span class="mi">4</span> <span class="o">|</span> |
| <a id="__codelineno-112-6" name="__codelineno-112-6" href="#__codelineno-112-6"></a><span class="o">|</span> <span class="mi">2</span> <span class="o">|</span> <span class="mi">5</span> <span class="o">|</span> |
| <a id="__codelineno-112-7" name="__codelineno-112-7" href="#__codelineno-112-7"></a><span class="o">|</span> <span class="mi">3</span> <span class="o">|</span> <span class="mi">6</span> <span class="o">|</span> |
| <a id="__codelineno-112-8" name="__codelineno-112-8" href="#__codelineno-112-8"></a><span class="o">+---+---+</span> |
| </code></pre></div> |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| </article> |
| </div> |
| |
| |
| <script>var target=document.getElementById(location.hash.slice(1));target&&target.name&&(target.checked=target.name.startsWith("__tabbed_"))</script> |
| </div> |
| |
| <button type="button" class="md-top md-icon" data-md-component="top" hidden> |
| |
| <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24"><path d="M13 20h-2V8l-5.5 5.5-1.42-1.42L12 4.16l7.92 7.92-1.42 1.42L13 8z"/></svg> |
| Back to top |
| </button> |
| |
| </main> |
| |
| <footer class="md-footer"> |
| |
| <div class="md-footer-meta md-typeset"> |
| <div class="md-footer-meta__inner md-grid"> |
| <div class="md-copyright"> |
| |
| |
| Made with |
| <a href="https://squidfunk.github.io/mkdocs-material/" target="_blank" rel="noopener"> |
| Material for MkDocs |
| </a> |
| |
| </div> |
| |
| </div> |
| </div> |
| </footer> |
| |
| </div> |
| <div class="md-dialog" data-md-component="dialog"> |
| <div class="md-dialog__inner md-typeset"></div> |
| </div> |
| |
| |
| |
| |
| |
| <script id="__config" type="application/json">{"annotate": null, "base": "..", "features": ["navigation.top", "navigation.tracking", "navigation.tabs", "navigation.tabs.sticky", "content.code.copy"], "search": "../assets/javascripts/workers/search.2c215733.min.js", "tags": null, "translations": {"clipboard.copied": "Copied to clipboard", "clipboard.copy": "Copy to clipboard", "search.result.more.one": "1 more on this page", "search.result.more.other": "# more on this page", "search.result.none": "No matching documents", "search.result.one": "1 matching document", "search.result.other": "# matching documents", "search.result.placeholder": "Type to start searching", "search.result.term.missing": "Missing", "select.version": "Select version"}, "version": null}</script> |
| |
| |
| <script src="../assets/javascripts/bundle.79ae519e.min.js"></script> |
| |
| |
| </body> |
| </html> |