blob: 77f7453e901ceea6145c475703e357bf50a0eb8c [file]
<!DOCTYPE html>
<html lang="en" data-content_root="../../../" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>pyarrow.interchange.from_dataframe &#8212; Apache Arrow v24.0.0.dev320</title>
<script data-cfasync="false">
document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
</script>
<!--
this give us a css class that will be invisible only if js is disabled
-->
<noscript>
<style>
.pst-js-only { display: none !important; }
</style>
</noscript>
<!-- Loaded before other Sphinx assets -->
<link href="../../../_static/styles/theme.css?digest=7f76b32a3354e82990f2" rel="stylesheet" />
<link href="../../../_static/styles/pydata-sphinx-theme.css?digest=7f76b32a3354e82990f2" rel="stylesheet" />
<link rel="stylesheet" type="text/css" href="../../../_static/pygments.css?v=03e43079" />
<link rel="stylesheet" type="text/css" href="../../../_static/copybutton.css?v=76b2166b" />
<link rel="stylesheet" type="text/css" href="../../../_static/sphinx-design.min.css?v=95c83b7e" />
<link rel="stylesheet" type="text/css" href="../../../_static/theme_overrides.css?v=8dcd28dc" />
<!-- So that users can add custom icons -->
<script defer src="../../../_static/scripts/fontawesome.js?digest=7f76b32a3354e82990f2"></script>
<!-- Pre-loaded scripts that we'll load fully later -->
<link rel="preload" as="script" href="../../../_static/scripts/bootstrap.js?digest=7f76b32a3354e82990f2" />
<link rel="preload" as="script" href="../../../_static/scripts/pydata-sphinx-theme.js?digest=7f76b32a3354e82990f2" />
<script src="../../../_static/documentation_options.js?v=a15c85f4"></script>
<script src="../../../_static/doctools.js?v=fd6eb6e6"></script>
<script src="../../../_static/sphinx_highlight.js?v=6ffebe34"></script>
<script src="../../../_static/clipboard.min.js?v=a7894cd8"></script>
<script src="../../../_static/copybutton.js?v=3bb21c8c"></script>
<script src="../../../_static/design-tabs.js?v=f930bc37"></script>
<script type="module">import mermaid from "https://cdn.jsdelivr.net/npm/mermaid@11.12.1/dist/mermaid.esm.min.mjs";
const initStyles = () => {
const defaultStyle = document.createElement('style');
defaultStyle.textContent = `pre.mermaid {
/* Same as .mermaid-container > pre */
display: block;
width: 100%;
}
pre.mermaid > svg {
/* Same as .mermaid-container > pre > svg */
height: 500px;
width: 100%;
max-width: 100% !important;
}`;
document.head.appendChild(defaultStyle);
const fullscreenStyle = document.createElement('style');
fullscreenStyle.textContent = `.mermaid-container {
display: flex;
flex-direction: row;
width: 100%;
}
.mermaid-container > pre {
display: block;
width: 100%;
}
.mermaid-container > pre > svg {
height: 500px;
width: 100%;
max-width: 100% !important;
}
.mermaid-fullscreen-btn {
width: 28px;
height: 28px;
background: rgba(255, 255, 255, 0.95);
border: 1px solid rgba(0, 0, 0, 0.3);
border-radius: 4px;
cursor: pointer;
display: flex;
align-items: center;
justify-content: center;
transition: all 0.2s;
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.2);
font-size: 14px;
line-height: 1;
padding: 0;
color: #333;
}
.mermaid-fullscreen-btn:hover {
opacity: 100% !important;
background: rgba(255, 255, 255, 1);
box-shadow: 0 3px 10px rgba(0, 0, 0, 0.3);
transform: scale(1.1);
}
.mermaid-fullscreen-btn.dark-theme {
background: rgba(50, 50, 50, 0.95);
border: 1px solid rgba(255, 255, 255, 0.3);
color: #e0e0e0;
}
.mermaid-fullscreen-btn.dark-theme:hover {
background: rgba(60, 60, 60, 1);
box-shadow: 0 3px 10px rgba(255, 255, 255, 0.2);
}
.mermaid-fullscreen-modal {
display: none;
position: fixed !important;
top: 0 !important;
left: 0 !important;
width: 95vw;
height: 100vh;
background: rgba(255, 255, 255, 0.98);
z-index: 9999;
padding: 20px;
overflow: auto;
}
.mermaid-fullscreen-modal.dark-theme {
background: rgba(0, 0, 0, 0.98);
}
.mermaid-fullscreen-modal.active {
display: flex;
align-items: center;
justify-content: center;
}
.mermaid-container-fullscreen {
position: relative;
width: 95vw;
height: 90vh;
max-width: 95vw;
max-height: 90vh;
background: white;
border-radius: 8px;
padding: 20px;
box-shadow: 0 10px 40px rgba(0, 0, 0, 0.3);
overflow: auto;
display: flex;
align-items: center;
justify-content: center;
}
.mermaid-container-fullscreen.dark-theme {
background: #1a1a1a;
box-shadow: 0 10px 40px rgba(0, 0, 0, 0.8);
}
.mermaid-container-fullscreen pre.mermaid {
width: 100%;
height: 100%;
display: flex;
align-items: center;
justify-content: center;
}
.mermaid-container-fullscreen .mermaid svg {
height: 100% !important;
width: 100% !important;
cursor: grab;
}
.mermaid-fullscreen-close {
position: fixed !important;
top: 20px !important;
right: 20px !important;
width: 40px;
height: 40px;
background: rgba(255, 255, 255, 0.95);
border: 1px solid rgba(0, 0, 0, 0.2);
border-radius: 50%;
cursor: pointer;
z-index: 10000;
display: flex;
align-items: center;
justify-content: center;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.3);
transition: all 0.2s;
font-size: 24px;
line-height: 1;
color: #333;
}
.mermaid-fullscreen-close:hover {
background: white;
box-shadow: 0 6px 16px rgba(0, 0, 0, 0.4);
transform: scale(1.1);
}
.mermaid-fullscreen-close.dark-theme {
background: rgba(50, 50, 50, 0.95);
border: 1px solid rgba(255, 255, 255, 0.2);
color: #e0e0e0;
}
.mermaid-fullscreen-close.dark-theme:hover {
background: rgba(60, 60, 60, 1);
box-shadow: 0 6px 16px rgba(255, 255, 255, 0.2);
}
.mermaid-fullscreen-modal .mermaid-fullscreen-btn {
display: none !important;
}`;
document.head.appendChild(fullscreenStyle);
}
// Detect if page has dark background
const isDarkTheme = () => {
// We use a set of heuristics:
// 1. Check for common dark mode classes or attributes
// 2. Check computed background color brightness
if (document.documentElement.classList.contains('dark') ||
document.documentElement.getAttribute('data-theme') === 'dark' ||
document.body.classList.contains('dark') ||
document.body.getAttribute('data-theme') === 'dark') {
// console.log("Dark theme detected via class/attribute");
return true;
}
if (document.documentElement.classList.contains('light') ||
document.documentElement.getAttribute('data-theme') === 'light' ||
document.body.classList.contains('light') ||
document.body.getAttribute('data-theme') === 'light') {
// console.log("Light theme detected via class/attribute");
return false;
}
if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
// console.log("Dark theme detected via prefers-color-scheme");
return true;
}
const bgColor = window.getComputedStyle(document.body).backgroundColor;
const match = bgColor.match(/rgb\((\d+),\s*(\d+),\s*(\d+)/);
if (match) {
const r = parseInt(match[1]);
const g = parseInt(match[2]);
const b = parseInt(match[3]);
const brightness = (r * 299 + g * 587 + b * 114) / 1000;
// console.log("Background color brightness:", brightness);
return brightness < 128;
}
// console.log("No dark or light theme detected, defaulting to light theme");
return false;
};
let darkTheme = isDarkTheme();
let modal = null;
let modalContent = null;
let previousScrollOffset = [window.scrollX, window.scrollY];
const runMermaid = async (rerun) => {
console.log("Running mermaid diagrams, rerun =", rerun);
// clear all existing mermaid charts
let all_mermaids = document.querySelectorAll(".mermaid");
if (rerun) {
all_mermaids.forEach((el) => {
if(!el.hasAttribute("data-original-code")) {
// store original code
// console.log(`Storing original code for first run: `, el.innerHTML);
el.setAttribute('data-original-code', el.innerHTML);
}
if(el.getAttribute("data-processed") === "true") {
// remove and restore original
el.removeAttribute("data-processed");
// console.log(`Restoring original code for re-run: `, el.getAttribute('data-original-code'));
el.innerHTML = el.getAttribute('data-original-code');
} else {
// store original code
// console.log(`Storing original code for re-run: `, el.innerHTML);
el.setAttribute('data-original-code', el.innerHTML);
}
});
await mermaid.run();
}
all_mermaids = document.querySelectorAll(".mermaid");
const mermaids_processed = document.querySelectorAll(".mermaid[data-processed='true']");
if ("False" === "True") {
const mermaids_to_add_zoom = -1 === -1 ? all_mermaids.length : -1;
if(mermaids_to_add_zoom > 0) {
var svgs = d3.selectAll("");
if(all_mermaids.length !== mermaids_processed.length) {
setTimeout(() => runMermaid(false), 200);
return;
} else if(svgs.size() !== mermaids_to_add_zoom) {
setTimeout(() => runMermaid(false), 200);
return;
} else {
svgs.each(function() {
var svg = d3.select(this);
svg.html("<g class='wrapper'>" + svg.html() + "</g>");
var inner = svg.select("g");
var zoom = d3.zoom().on("zoom", function(event) {
inner.attr("transform", event.transform);
});
svg.call(zoom);
});
}
}
} else if(all_mermaids.length !== mermaids_processed.length) {
// Wait for mermaid to process all diagrams
setTimeout(() => runMermaid(false), 200);
return;
}
// Stop here if not adding fullscreen capability
if ("True" !== "True") return;
if (modal !== null ) {
// Destroy existing modal
modal.remove();
modal = null;
modalContent = null;
}
modal = document.createElement('div');
modal.className = 'mermaid-fullscreen-modal' + (darkTheme ? ' dark-theme' : '');
modal.setAttribute('role', 'dialog');
modal.setAttribute('aria-modal', 'true');
modal.setAttribute('aria-label', 'Fullscreen diagram viewer');
modal.innerHTML = `
<button class="mermaid-fullscreen-close${darkTheme ? ' dark-theme' : ''}" aria-label="Close fullscreen">✕</button>
<div class="mermaid-container-fullscreen${darkTheme ? ' dark-theme' : ''}"></div>
`;
document.body.appendChild(modal);
modalContent = modal.querySelector('.mermaid-container-fullscreen');
const closeBtn = modal.querySelector('.mermaid-fullscreen-close');
const closeModal = () => {
modal.classList.remove('active');
modalContent.innerHTML = '';
document.body.style.overflow = ''
window.scrollTo({left: previousScrollOffset[0], top: previousScrollOffset[1], behavior: 'instant'});
};
closeBtn.addEventListener('click', closeModal);
modal.addEventListener('click', (e) => {
if (e.target === modal) closeModal();
});
document.addEventListener('keydown', (e) => {
if (e.key === 'Escape' && modal.classList.contains('active')) {
closeModal();
}
});
document.querySelectorAll('.mermaid').forEach((mermaidDiv) => {
if (mermaidDiv.parentNode.classList.contains('mermaid-container') ||
mermaidDiv.closest('.mermaid-fullscreen-modal')) {
// Already processed, adjust button class if needed
const existingBtn = mermaidDiv.parentNode.querySelector('.mermaid-fullscreen-btn');
if (existingBtn) {
existingBtn.className = 'mermaid-fullscreen-btn' + (darkTheme ? ' dark-theme' : '');
}
return;
}
const container = document.createElement('div');
container.className = 'mermaid-container';
mermaidDiv.parentNode.insertBefore(container, mermaidDiv);
container.appendChild(mermaidDiv);
const fullscreenBtn = document.createElement('button');
fullscreenBtn.className = 'mermaid-fullscreen-btn' + (darkTheme ? ' dark-theme' : '');
fullscreenBtn.setAttribute('aria-label', 'View diagram in fullscreen');
fullscreenBtn.textContent = '⛶';
fullscreenBtn.style.opacity = '50%';
// Calculate dynamic position based on diagram's margin and padding
const diagramStyle = window.getComputedStyle(mermaidDiv);
const marginTop = parseFloat(diagramStyle.marginTop) || 0;
const marginRight = parseFloat(diagramStyle.marginRight) || 0;
const paddingTop = parseFloat(diagramStyle.paddingTop) || 0;
const paddingRight = parseFloat(diagramStyle.paddingRight) || 0;
fullscreenBtn.style.top = `${marginTop + paddingTop + 4}px`;
fullscreenBtn.style.right = `${marginRight + paddingRight + 4}px`;
fullscreenBtn.addEventListener('click', () => {
previousScrollOffset = [window.scroll, window.scrollY];
const clone = mermaidDiv.cloneNode(true);
modalContent.innerHTML = '';
modalContent.appendChild(clone);
const svg = clone.querySelector('svg');
if (svg) {
svg.removeAttribute('width');
svg.removeAttribute('height');
svg.style.width = '100%';
svg.style.height = 'auto';
svg.style.maxWidth = '100%';
svg.style.sdisplay = 'block';
if ("False" === "True") {
setTimeout(() => {
const g = svg.querySelector('g');
if (g) {
var svgD3 = d3.select(svg);
svgD3.html("<g class='wrapper'>" + svgD3.html() + "</g>");
var inner = svgD3.select("g");
var zoom = d3.zoom().on("zoom", function(event) {
inner.attr("transform", event.transform);
});
svgD3.call(zoom);
}
}, 100);
}
}
modal.classList.add('active');
document.body.style.overflow = 'hidden';
});
container.appendChild(fullscreenBtn);
});
};
const load = async () => {
initStyles();
await runMermaid(true);
const reRunIfThemeChanges = async () => {
const newDarkTheme = isDarkTheme();
if (newDarkTheme !== darkTheme) {
darkTheme = newDarkTheme;
console.log("Theme change detected, re-running mermaid with", darkTheme ? "dark" : "default", "theme");
await mermaid.initialize(
{...JSON.parse(
`{"startOnLoad": false}`
),
...{ darkMode: darkTheme, theme: darkTheme ? 'dark' : 'default' },
}
);
await runMermaid(true);
}
};
// Update theme classes when theme changes
const themeObserver = new MutationObserver(reRunIfThemeChanges);
themeObserver.observe(document.documentElement, {
attributes: true,
attributeFilter: ['class', 'style', 'data-theme']
});
themeObserver.observe(document.body, {
attributes: true,
attributeFilter: ['class', 'style', 'data-theme']
});
};
console.log("Initializing mermaid with", darkTheme ? "dark" : "default", "theme");
mermaid.initialize(
{...JSON.parse(
`{"startOnLoad": false}`
),
...{ darkMode: darkTheme, theme: darkTheme ? 'dark' : 'default' },
}
);
window.addEventListener("load", load);
window.runMermaid = runMermaid;</script>
<script>DOCUMENTATION_OPTIONS.pagename = '_modules/pyarrow/interchange/from_dataframe';</script>
<script>
DOCUMENTATION_OPTIONS.theme_version = '0.17.0';
DOCUMENTATION_OPTIONS.theme_switcher_json_url = '/docs/_static/versions.json';
DOCUMENTATION_OPTIONS.theme_switcher_version_match = 'dev/';
DOCUMENTATION_OPTIONS.show_version_warning_banner =
true;
</script>
<script>DOCUMENTATION_OPTIONS.search_as_you_type = false;</script>
<link rel="canonical" href="https://arrow.apache.org/docs/_modules/pyarrow/interchange/from_dataframe.html" />
<link rel="icon" href="../../../_static/favicon.ico"/>
<link rel="index" title="Index" href="../../../genindex.html" />
<link rel="search" title="Search" href="../../../search.html" />
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<meta name="docsearch:language" content="en"/>
<meta name="docsearch:version" content="24.0.0.dev320" />
<script src="../../../_static/searchtools.js"></script>
<script src="../../../_static/language_data.js"></script>
<script src="../../../searchindex.js"></script>
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-default-mode="">
<div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
<div id="pst-scroll-pixel-helper"></div>
<button type="button" class="btn rounded-pill" id="pst-back-to-top">
<i class="fa-solid fa-arrow-up"></i>Back to top</button>
<dialog id="pst-search-dialog">
<form class="bd-search d-flex align-items-center"
action="../../../search.html"
method="get">
<i class="fa-solid fa-magnifying-glass"></i>
<input type="search"
class="form-control"
name="q"
placeholder="Search the docs ..."
aria-label="Search the docs ..."
autocomplete="off"
autocorrect="off"
autocapitalize="off"
spellcheck="false"/>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
</dialog>
<div class="pst-async-banner-revealer d-none">
<aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>
<header id="pst-header" class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
<button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
<span class="fa-solid fa-bars"></span>
</button>
<div class=" navbar-header-items__start">
<div class="navbar-item">
<a class="navbar-brand logo" href="../../../index.html">
<img src="../../../_static/arrow.png" class="logo__image only-light" alt="Apache Arrow v24.0.0.dev320 - Home"/>
<img src="../../../_static/arrow-dark.png" class="logo__image only-dark pst-js-only" alt="Apache Arrow v24.0.0.dev320 - Home"/>
</a></div>
</div>
<div class=" navbar-header-items">
<div class="me-auto navbar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../../developers/index.html">
Development
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../../implementations.html">
Implementations
</a>
</li>
</ul>
</nav></div>
</div>
<div class="navbar-header-items__end">
<div class="navbar-item navbar-persistent--container">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
<div class="navbar-item"><div class="kapa-ai-bot">
<script
async
src="https://widget.kapa.ai/kapa-widget.bundle.js"
data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
data-project-name="Apache Arrow"
data-project-color="#000000"
data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc."
data-consent-required="true"
data-user-analytics-cookie-enabled="false"
data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
></script>
</div>
</div>
<div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
<button id="pst-version-switcher-button-2"
type="button"
class="version-switcher__button btn btn-sm dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-2"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-2"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-2">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i>
<span class="sr-only">LinkedIn</span></a>
</li>
<li class="nav-item">
<a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i>
<span class="sr-only">BlueSky</span></a>
</li>
</ul></div>
</div>
</div>
<div class="navbar-persistent--mobile">
<button class="btn search-button-field search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="fa-solid fa-magnifying-glass"></i>
<span class="search-button__default-text">Search</span>
<span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd class="kbd-shortcut__modifier">K</kbd></span>
</button>
</div>
</div>
</header>
<div class="bd-container">
<div class="bd-container__inner bd-page-width">
<dialog id="pst-primary-sidebar-modal"></dialog>
<div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar hide-on-wide">
<div class="sidebar-header-items sidebar-primary__section">
<div class="sidebar-header-items__center">
<div class="navbar-item">
<nav>
<ul class="bd-navbar-elements navbar-nav">
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../../format/index.html">
Specifications
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../../developers/index.html">
Development
</a>
</li>
<li class="nav-item ">
<a class="nav-link nav-internal" href="../../../implementations.html">
Implementations
</a>
</li>
</ul>
</nav></div>
</div>
<div class="sidebar-header-items__end">
<div class="navbar-item"><div class="kapa-ai-bot">
<script
async
src="https://widget.kapa.ai/kapa-widget.bundle.js"
data-website-id="9db461d5-ac77-4b3f-a5c5-75efa78339d2"
data-project-name="Apache Arrow"
data-project-color="#000000"
data-project-logo="https://arrow.apache.org/img/arrow-logo_chevrons_white-txt_black-bg.png"
data-modal-disclaimer="This is a custom LLM with access to all [Arrow documentation](https://arrow.apache.org/docs/). Please include the language you are using in your question, e.g., Python, C++, Java, R, etc."
data-consent-required="true"
data-user-analytics-cookie-enabled="false"
data-consent-screen-disclaimer="By clicking &quot;I agree, let's chat&quot;, you consent to the use of the AI assistant in accordance with kapa.ai's [Privacy Policy](https://www.kapa.ai/content/privacy-policy). This service uses reCAPTCHA, which requires your consent to Google's [Privacy Policy](https://policies.google.com/privacy) and [Terms of Service](https://policies.google.com/terms). By proceeding, you explicitly agree to both kapa.ai's and Google's privacy policies."
></script>
</div>
</div>
<div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
<button id="pst-version-switcher-button-3"
type="button"
class="version-switcher__button btn btn-sm dropdown-toggle"
data-bs-toggle="dropdown"
aria-haspopup="listbox"
aria-controls="pst-version-switcher-list-3"
aria-label="Version switcher list"
>
Choose version <!-- this text may get changed later by javascript -->
<span class="caret"></span>
</button>
<div id="pst-version-switcher-list-3"
class="version-switcher__menu dropdown-menu list-group-flush py-0"
role="listbox" aria-labelledby="pst-version-switcher-button-3">
<!-- dropdown will be populated by javascript on page load -->
</div>
</div></div>
<div class="navbar-item">
<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode" data-bs-placement="bottom" data-bs-toggle="tooltip">
<i class="theme-switch fa-solid fa-sun fa-lg" data-mode="light" title="Light"></i>
<i class="theme-switch fa-solid fa-moon fa-lg" data-mode="dark" title="Dark"></i>
<i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto" title="System Settings"></i>
</button></div>
<div class="navbar-item"><ul class="navbar-icon-links"
aria-label="Icon Links">
<li class="nav-item">
<a href="https://github.com/apache/arrow" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
<span class="sr-only">GitHub</span></a>
</li>
<li class="nav-item">
<a href="https://www.linkedin.com/company/apache-arrow/" title="LinkedIn" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-linkedin fa-lg" aria-hidden="true"></i>
<span class="sr-only">LinkedIn</span></a>
</li>
<li class="nav-item">
<a href="https://bsky.app/profile/arrow.apache.org" title="BlueSky" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-bluesky fa-lg" aria-hidden="true"></i>
<span class="sr-only">BlueSky</span></a>
</li>
</ul></div>
</div>
</div>
<div class="sidebar-primary-items__end sidebar-primary__section">
<div class="sidebar-primary-item">
<div id="ethical-ad-placement"
class="flat"
data-ea-publisher="readthedocs"
data-ea-type="readthedocs-sidebar"
data-ea-manual="true">
</div></div>
</div>
</div>
<main id="main-content" class="bd-main" role="main">
<div class="bd-content">
<div class="bd-article-container">
<div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
<div class="header-article-items__start">
<div class="header-article-item">
<nav aria-label="Breadcrumb" class="d-print-none">
<ul class="bd-breadcrumbs">
<li class="breadcrumb-item breadcrumb-home">
<a href="../../../index.html" class="nav-link" aria-label="Home">
<i class="fa-solid fa-home"></i>
</a>
</li>
<li class="breadcrumb-item"><a href="../../index.html" class="nav-link">Module code</a></li>
<li class="breadcrumb-item"><a href="../../pyarrow.html" class="nav-link">pyarrow</a></li>
<li class="breadcrumb-item active" aria-current="page"><span class="ellipsis">pyarrow.interchange.from_dataframe</span></li>
</ul>
</nav>
</div>
</div>
</div>
</div>
<div id="searchbox"></div>
<article class="bd-article">
<h1>Source code for pyarrow.interchange.from_dataframe</h1><div class="highlight"><pre>
<span></span><span class="c1"># Licensed to the Apache Software Foundation (ASF) under one</span>
<span class="c1"># or more contributor license agreements. See the NOTICE file</span>
<span class="c1"># distributed with this work for additional information</span>
<span class="c1"># regarding copyright ownership. The ASF licenses this file</span>
<span class="c1"># to you under the Apache License, Version 2.0 (the</span>
<span class="c1"># &quot;License&quot;); you may not use this file except in compliance</span>
<span class="c1"># with the License. You may obtain a copy of the License at</span>
<span class="c1">#</span>
<span class="c1"># http://www.apache.org/licenses/LICENSE-2.0</span>
<span class="c1">#</span>
<span class="c1"># Unless required by applicable law or agreed to in writing,</span>
<span class="c1"># software distributed under the License is distributed on an</span>
<span class="c1"># &quot;AS IS&quot; BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY</span>
<span class="c1"># KIND, either express or implied. See the License for the</span>
<span class="c1"># specific language governing permissions and limitations</span>
<span class="c1"># under the License.</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">__future__</span><span class="w"> </span><span class="kn">import</span> <span class="n">annotations</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">typing</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span>
<span class="n">Any</span><span class="p">,</span>
<span class="n">Tuple</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.interchange.column</span><span class="w"> </span><span class="kn">import</span> <span class="p">(</span>
<span class="n">DtypeKind</span><span class="p">,</span>
<span class="n">ColumnBuffers</span><span class="p">,</span>
<span class="n">ColumnNullType</span><span class="p">,</span>
<span class="p">)</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pa</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">re</span>
<span class="kn">import</span><span class="w"> </span><span class="nn">pyarrow.compute</span><span class="w"> </span><span class="k">as</span><span class="w"> </span><span class="nn">pc</span>
<span class="kn">from</span><span class="w"> </span><span class="nn">pyarrow.interchange.column</span><span class="w"> </span><span class="kn">import</span> <span class="n">Dtype</span>
<span class="c1"># A typing protocol could be added later to let Mypy validate code using</span>
<span class="c1"># `from_dataframe` better.</span>
<span class="n">DataFrameObject</span> <span class="o">=</span> <span class="n">Any</span>
<span class="n">ColumnObject</span> <span class="o">=</span> <span class="n">Any</span>
<span class="n">BufferObject</span> <span class="o">=</span> <span class="n">Any</span>
<span class="n">_PYARROW_DTYPES</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="n">DtypeKind</span><span class="p">,</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">int</span><span class="p">,</span> <span class="n">Any</span><span class="p">]]</span> <span class="o">=</span> <span class="p">{</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">INT</span><span class="p">:</span> <span class="p">{</span><span class="mi">8</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">int8</span><span class="p">(),</span>
<span class="mi">16</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">int16</span><span class="p">(),</span>
<span class="mi">32</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">int32</span><span class="p">(),</span>
<span class="mi">64</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()},</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">UINT</span><span class="p">:</span> <span class="p">{</span><span class="mi">8</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">uint8</span><span class="p">(),</span>
<span class="mi">16</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">uint16</span><span class="p">(),</span>
<span class="mi">32</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">uint32</span><span class="p">(),</span>
<span class="mi">64</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">uint64</span><span class="p">()},</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">FLOAT</span><span class="p">:</span> <span class="p">{</span><span class="mi">16</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">float16</span><span class="p">(),</span>
<span class="mi">32</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">float32</span><span class="p">(),</span>
<span class="mi">64</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">float64</span><span class="p">()},</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">BOOL</span><span class="p">:</span> <span class="p">{</span><span class="mi">1</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">bool_</span><span class="p">(),</span>
<span class="mi">8</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">uint8</span><span class="p">()},</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">STRING</span><span class="p">:</span> <span class="p">{</span><span class="mi">8</span><span class="p">:</span> <span class="n">pa</span><span class="o">.</span><span class="n">string</span><span class="p">()},</span>
<span class="p">}</span>
<div class="viewcode-block" id="from_dataframe">
<a class="viewcode-back" href="../../../python/generated/pyarrow.interchange.from_dataframe.html#pyarrow.interchange.from_dataframe">[docs]</a>
<span class="k">def</span><span class="w"> </span><span class="nf">from_dataframe</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrameObject</span><span class="p">,</span> <span class="n">allow_copy</span><span class="o">=</span><span class="kc">True</span><span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Build a ``pa.Table`` from any DataFrame supporting the interchange protocol.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> df : DataFrameObject</span>
<span class="sd"> Object supporting the interchange protocol, i.e. `__dataframe__`</span>
<span class="sd"> method.</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.Table</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="p">):</span>
<span class="k">return</span> <span class="n">df</span>
<span class="k">elif</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">):</span>
<span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">([</span><span class="n">df</span><span class="p">])</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">hasattr</span><span class="p">(</span><span class="n">df</span><span class="p">,</span> <span class="s2">&quot;__dataframe__&quot;</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="s2">&quot;`df` does not support __dataframe__&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">_from_dataframe</span><span class="p">(</span><span class="n">df</span><span class="o">.</span><span class="n">__dataframe__</span><span class="p">(</span><span class="n">allow_copy</span><span class="o">=</span><span class="n">allow_copy</span><span class="p">),</span>
<span class="n">allow_copy</span><span class="o">=</span><span class="n">allow_copy</span><span class="p">)</span></div>
<span class="k">def</span><span class="w"> </span><span class="nf">_from_dataframe</span><span class="p">(</span><span class="n">df</span><span class="p">:</span> <span class="n">DataFrameObject</span><span class="p">,</span> <span class="n">allow_copy</span><span class="o">=</span><span class="kc">True</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Build a ``pa.Table`` from the DataFrame interchange object.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> df : DataFrameObject</span>
<span class="sd"> Object supporting the interchange protocol, i.e. `__dataframe__`</span>
<span class="sd"> method.</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.Table</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">batches</span> <span class="o">=</span> <span class="p">[]</span>
<span class="k">for</span> <span class="n">chunk</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">get_chunks</span><span class="p">():</span>
<span class="n">batch</span> <span class="o">=</span> <span class="n">protocol_df_chunk_to_pyarrow</span><span class="p">(</span><span class="n">chunk</span><span class="p">,</span> <span class="n">allow_copy</span><span class="p">)</span>
<span class="n">batches</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">batches</span><span class="p">:</span>
<span class="n">batch</span> <span class="o">=</span> <span class="n">protocol_df_chunk_to_pyarrow</span><span class="p">(</span><span class="n">df</span><span class="p">)</span>
<span class="n">batches</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">batch</span><span class="p">)</span>
<span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">Table</span><span class="o">.</span><span class="n">from_batches</span><span class="p">(</span><span class="n">batches</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">protocol_df_chunk_to_pyarrow</span><span class="p">(</span>
<span class="n">df</span><span class="p">:</span> <span class="n">DataFrameObject</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert interchange protocol chunk to ``pa.RecordBatch``.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> df : DataFrameObject</span>
<span class="sd"> Object supporting the interchange protocol, i.e. `__dataframe__`</span>
<span class="sd"> method.</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.RecordBatch</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="c1"># We need a dict of columns here, with each column being a pa.Array</span>
<span class="n">columns</span><span class="p">:</span> <span class="nb">dict</span><span class="p">[</span><span class="nb">str</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">]</span> <span class="o">=</span> <span class="p">{}</span>
<span class="k">for</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">df</span><span class="o">.</span><span class="n">column_names</span><span class="p">():</span>
<span class="k">if</span> <span class="ow">not</span> <span class="nb">isinstance</span><span class="p">(</span><span class="n">name</span><span class="p">,</span> <span class="nb">str</span><span class="p">):</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2"> is not a string&quot;</span><span class="p">)</span>
<span class="k">if</span> <span class="n">name</span> <span class="ow">in</span> <span class="n">columns</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">ValueError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Column </span><span class="si">{</span><span class="n">name</span><span class="si">}</span><span class="s2"> is not unique&quot;</span><span class="p">)</span>
<span class="n">col</span> <span class="o">=</span> <span class="n">df</span><span class="o">.</span><span class="n">get_column_by_name</span><span class="p">(</span><span class="n">name</span><span class="p">)</span>
<span class="n">dtype</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">dtype</span><span class="p">[</span><span class="mi">0</span><span class="p">]</span>
<span class="k">if</span> <span class="n">dtype</span> <span class="ow">in</span> <span class="p">(</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">INT</span><span class="p">,</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">UINT</span><span class="p">,</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">FLOAT</span><span class="p">,</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">STRING</span><span class="p">,</span>
<span class="n">DtypeKind</span><span class="o">.</span><span class="n">DATETIME</span><span class="p">,</span>
<span class="p">):</span>
<span class="n">columns</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">column_to_array</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">allow_copy</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dtype</span> <span class="o">==</span> <span class="n">DtypeKind</span><span class="o">.</span><span class="n">BOOL</span><span class="p">:</span>
<span class="n">columns</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">bool_column_to_array</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">allow_copy</span><span class="p">)</span>
<span class="k">elif</span> <span class="n">dtype</span> <span class="o">==</span> <span class="n">DtypeKind</span><span class="o">.</span><span class="n">CATEGORICAL</span><span class="p">:</span>
<span class="n">columns</span><span class="p">[</span><span class="n">name</span><span class="p">]</span> <span class="o">=</span> <span class="n">categorical_column_to_dictionary</span><span class="p">(</span><span class="n">col</span><span class="p">,</span> <span class="n">allow_copy</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;Data type </span><span class="si">{</span><span class="n">dtype</span><span class="si">}</span><span class="s2"> not handled yet&quot;</span><span class="p">)</span>
<span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">RecordBatch</span><span class="o">.</span><span class="n">from_pydict</span><span class="p">(</span><span class="n">columns</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">column_to_array</span><span class="p">(</span>
<span class="n">col</span><span class="p">:</span> <span class="n">ColumnObject</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert a column holding one of the primitive dtypes to a PyArrow array.</span>
<span class="sd"> A primitive type is one of: int, uint, float, bool (1 bit).</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> col : ColumnObject</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.Array</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">buffers</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get_buffers</span><span class="p">()</span>
<span class="n">data_type</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">dtype</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">buffers_to_array</span><span class="p">(</span><span class="n">buffers</span><span class="p">,</span> <span class="n">data_type</span><span class="p">,</span>
<span class="n">col</span><span class="o">.</span><span class="n">size</span><span class="p">(),</span>
<span class="n">col</span><span class="o">.</span><span class="n">describe_null</span><span class="p">,</span>
<span class="n">col</span><span class="o">.</span><span class="n">offset</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">)</span>
<span class="k">return</span> <span class="n">data</span>
<span class="k">def</span><span class="w"> </span><span class="nf">bool_column_to_array</span><span class="p">(</span>
<span class="n">col</span><span class="p">:</span> <span class="n">ColumnObject</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert a column holding boolean dtype to a PyArrow array.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> col : ColumnObject</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.Array</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">buffers</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get_buffers</span><span class="p">()</span>
<span class="n">size</span> <span class="o">=</span> <span class="n">buffers</span><span class="p">[</span><span class="s2">&quot;data&quot;</span><span class="p">][</span><span class="mi">1</span><span class="p">][</span><span class="mi">1</span><span class="p">]</span>
<span class="c1"># If booleans are byte-packed a copy to bit-packed will be made</span>
<span class="k">if</span> <span class="n">size</span> <span class="o">==</span> <span class="mi">8</span> <span class="ow">and</span> <span class="ow">not</span> <span class="n">allow_copy</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
<span class="s2">&quot;Boolean column will be casted from uint8 and a copy &quot;</span>
<span class="s2">&quot;is required which is forbidden by allow_copy=False&quot;</span>
<span class="p">)</span>
<span class="n">data_type</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">dtype</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">buffers_to_array</span><span class="p">(</span><span class="n">buffers</span><span class="p">,</span> <span class="n">data_type</span><span class="p">,</span>
<span class="n">col</span><span class="o">.</span><span class="n">size</span><span class="p">(),</span>
<span class="n">col</span><span class="o">.</span><span class="n">describe_null</span><span class="p">,</span>
<span class="n">col</span><span class="o">.</span><span class="n">offset</span><span class="p">)</span>
<span class="k">if</span> <span class="n">size</span> <span class="o">==</span> <span class="mi">8</span><span class="p">:</span>
<span class="n">data</span> <span class="o">=</span> <span class="n">pc</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">data</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">bool_</span><span class="p">())</span>
<span class="k">return</span> <span class="n">data</span>
<span class="k">def</span><span class="w"> </span><span class="nf">categorical_column_to_dictionary</span><span class="p">(</span>
<span class="n">col</span><span class="p">:</span> <span class="n">ColumnObject</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">DictionaryArray</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Convert a column holding categorical data to a pa.DictionaryArray.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> col : ColumnObject</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.DictionaryArray</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">allow_copy</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
<span class="s2">&quot;Categorical column will be casted from uint8 and a copy &quot;</span>
<span class="s2">&quot;is required which is forbidden by allow_copy=False&quot;</span>
<span class="p">)</span>
<span class="n">categorical</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">describe_categorical</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">categorical</span><span class="p">[</span><span class="s2">&quot;is_dictionary&quot;</span><span class="p">]:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="s2">&quot;Non-dictionary categoricals not supported yet&quot;</span><span class="p">)</span>
<span class="c1"># We need to first convert the dictionary column</span>
<span class="n">cat_column</span> <span class="o">=</span> <span class="n">categorical</span><span class="p">[</span><span class="s2">&quot;categories&quot;</span><span class="p">]</span>
<span class="n">dictionary</span> <span class="o">=</span> <span class="n">column_to_array</span><span class="p">(</span><span class="n">cat_column</span><span class="p">)</span>
<span class="c1"># Then we need to convert the indices</span>
<span class="c1"># Here we need to use the buffer data type!</span>
<span class="n">buffers</span> <span class="o">=</span> <span class="n">col</span><span class="o">.</span><span class="n">get_buffers</span><span class="p">()</span>
<span class="n">_</span><span class="p">,</span> <span class="n">data_type</span> <span class="o">=</span> <span class="n">buffers</span><span class="p">[</span><span class="s2">&quot;data&quot;</span><span class="p">]</span>
<span class="n">indices</span> <span class="o">=</span> <span class="n">buffers_to_array</span><span class="p">(</span><span class="n">buffers</span><span class="p">,</span> <span class="n">data_type</span><span class="p">,</span>
<span class="n">col</span><span class="o">.</span><span class="n">size</span><span class="p">(),</span>
<span class="n">col</span><span class="o">.</span><span class="n">describe_null</span><span class="p">,</span>
<span class="n">col</span><span class="o">.</span><span class="n">offset</span><span class="p">)</span>
<span class="c1"># Constructing a pa.DictionaryArray</span>
<span class="n">dict_array</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">DictionaryArray</span><span class="o">.</span><span class="n">from_arrays</span><span class="p">(</span><span class="n">indices</span><span class="p">,</span> <span class="n">dictionary</span><span class="p">)</span>
<span class="k">return</span> <span class="n">dict_array</span>
<span class="k">def</span><span class="w"> </span><span class="nf">parse_datetime_format_str</span><span class="p">(</span><span class="n">format_str</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Parse datetime `format_str` to interpret the `data`.&quot;&quot;&quot;</span>
<span class="c1"># timestamp &#39;ts{unit}:tz&#39;</span>
<span class="n">timestamp_meta</span> <span class="o">=</span> <span class="n">re</span><span class="o">.</span><span class="n">match</span><span class="p">(</span><span class="sa">r</span><span class="s2">&quot;ts([smun]):(.*)&quot;</span><span class="p">,</span> <span class="n">format_str</span><span class="p">)</span>
<span class="k">if</span> <span class="n">timestamp_meta</span><span class="p">:</span>
<span class="n">unit</span><span class="p">,</span> <span class="n">tz</span> <span class="o">=</span> <span class="n">timestamp_meta</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">1</span><span class="p">),</span> <span class="n">timestamp_meta</span><span class="o">.</span><span class="n">group</span><span class="p">(</span><span class="mi">2</span><span class="p">)</span>
<span class="k">if</span> <span class="n">unit</span> <span class="o">!=</span> <span class="s2">&quot;s&quot;</span><span class="p">:</span>
<span class="c1"># the format string describes only a first letter of the unit, so</span>
<span class="c1"># add one extra letter to convert the unit to numpy-style:</span>
<span class="c1"># &#39;m&#39; -&gt; &#39;ms&#39;, &#39;u&#39; -&gt; &#39;us&#39;, &#39;n&#39; -&gt; &#39;ns&#39;</span>
<span class="n">unit</span> <span class="o">+=</span> <span class="s2">&quot;s&quot;</span>
<span class="k">return</span> <span class="n">unit</span><span class="p">,</span> <span class="n">tz</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span><span class="sa">f</span><span class="s2">&quot;DateTime kind is not supported: </span><span class="si">{</span><span class="n">format_str</span><span class="si">}</span><span class="s2">&quot;</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">map_date_type</span><span class="p">(</span><span class="n">data_type</span><span class="p">):</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;Map column date type to pyarrow date type. &quot;&quot;&quot;</span>
<span class="n">kind</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">f_string</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">data_type</span>
<span class="k">if</span> <span class="n">kind</span> <span class="o">==</span> <span class="n">DtypeKind</span><span class="o">.</span><span class="n">DATETIME</span><span class="p">:</span>
<span class="n">unit</span><span class="p">,</span> <span class="n">tz</span> <span class="o">=</span> <span class="n">parse_datetime_format_str</span><span class="p">(</span><span class="n">f_string</span><span class="p">)</span>
<span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">timestamp</span><span class="p">(</span><span class="n">unit</span><span class="p">,</span> <span class="n">tz</span><span class="o">=</span><span class="n">tz</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">pa_dtype</span> <span class="o">=</span> <span class="n">_PYARROW_DTYPES</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">kind</span><span class="p">,</span> <span class="p">{})</span><span class="o">.</span><span class="n">get</span><span class="p">(</span><span class="n">bit_width</span><span class="p">,</span> <span class="kc">None</span><span class="p">)</span>
<span class="c1"># Error if dtype is not supported</span>
<span class="k">if</span> <span class="n">pa_dtype</span><span class="p">:</span>
<span class="k">return</span> <span class="n">pa_dtype</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;Conversion for </span><span class="si">{</span><span class="n">data_type</span><span class="si">}</span><span class="s2"> is not yet supported.&quot;</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">buffers_to_array</span><span class="p">(</span>
<span class="n">buffers</span><span class="p">:</span> <span class="n">ColumnBuffers</span><span class="p">,</span>
<span class="n">data_type</span><span class="p">:</span> <span class="n">Tuple</span><span class="p">[</span><span class="n">DtypeKind</span><span class="p">,</span> <span class="nb">int</span><span class="p">,</span> <span class="nb">str</span><span class="p">,</span> <span class="nb">str</span><span class="p">],</span>
<span class="n">length</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">describe_null</span><span class="p">:</span> <span class="n">ColumnNullType</span><span class="p">,</span>
<span class="n">offset</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Build a PyArrow array from the passed buffer.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> buffer : ColumnBuffers</span>
<span class="sd"> Dictionary containing tuples of underlying buffers and</span>
<span class="sd"> their associated dtype.</span>
<span class="sd"> data_type : Tuple[DtypeKind, int, str, str],</span>
<span class="sd"> Dtype description of the column as a tuple ``(kind, bit-width, format string,</span>
<span class="sd"> endianness)``.</span>
<span class="sd"> length : int</span>
<span class="sd"> The number of values in the array.</span>
<span class="sd"> describe_null: ColumnNullType</span>
<span class="sd"> Null representation the column dtype uses,</span>
<span class="sd"> as a tuple ``(kind, value)``</span>
<span class="sd"> offset : int, default: 0</span>
<span class="sd"> Number of elements to offset from the start of the buffer.</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.Array</span>
<span class="sd"> Notes</span>
<span class="sd"> -----</span>
<span class="sd"> The returned array doesn&#39;t own the memory. The caller of this function</span>
<span class="sd"> is responsible for keeping the memory owner object alive as long as</span>
<span class="sd"> the returned PyArrow array is being used.</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">data_buff</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">buffers</span><span class="p">[</span><span class="s2">&quot;data&quot;</span><span class="p">]</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">validity_buff</span><span class="p">,</span> <span class="n">validity_dtype</span> <span class="o">=</span> <span class="n">buffers</span><span class="p">[</span><span class="s2">&quot;validity&quot;</span><span class="p">]</span>
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
<span class="n">validity_buff</span> <span class="o">=</span> <span class="kc">None</span>
<span class="k">try</span><span class="p">:</span>
<span class="n">offset_buff</span><span class="p">,</span> <span class="n">offset_dtype</span> <span class="o">=</span> <span class="n">buffers</span><span class="p">[</span><span class="s2">&quot;offsets&quot;</span><span class="p">]</span>
<span class="k">except</span> <span class="ne">TypeError</span><span class="p">:</span>
<span class="n">offset_buff</span> <span class="o">=</span> <span class="kc">None</span>
<span class="c1"># Construct a pyarrow Buffer</span>
<span class="n">data_pa_buffer</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">foreign_buffer</span><span class="p">(</span><span class="n">data_buff</span><span class="o">.</span><span class="n">ptr</span><span class="p">,</span> <span class="n">data_buff</span><span class="o">.</span><span class="n">bufsize</span><span class="p">,</span>
<span class="n">base</span><span class="o">=</span><span class="n">data_buff</span><span class="p">)</span>
<span class="c1"># Construct a validity pyarrow Buffer, if applicable</span>
<span class="k">if</span> <span class="n">validity_buff</span><span class="p">:</span>
<span class="n">validity_pa_buff</span> <span class="o">=</span> <span class="n">validity_buffer_from_mask</span><span class="p">(</span><span class="n">validity_buff</span><span class="p">,</span>
<span class="n">validity_dtype</span><span class="p">,</span>
<span class="n">describe_null</span><span class="p">,</span>
<span class="n">length</span><span class="p">,</span>
<span class="n">offset</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">validity_pa_buff</span> <span class="o">=</span> <span class="n">validity_buffer_nan_sentinel</span><span class="p">(</span><span class="n">data_pa_buffer</span><span class="p">,</span>
<span class="n">data_type</span><span class="p">,</span>
<span class="n">describe_null</span><span class="p">,</span>
<span class="n">length</span><span class="p">,</span>
<span class="n">offset</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">)</span>
<span class="c1"># Construct a pyarrow Array from buffers</span>
<span class="n">data_dtype</span> <span class="o">=</span> <span class="n">map_date_type</span><span class="p">(</span><span class="n">data_type</span><span class="p">)</span>
<span class="k">if</span> <span class="n">offset_buff</span><span class="p">:</span>
<span class="n">_</span><span class="p">,</span> <span class="n">offset_bit_width</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">offset_dtype</span>
<span class="c1"># If an offset buffer exists, construct an offset pyarrow Buffer</span>
<span class="c1"># and add it to the construction of an array</span>
<span class="n">offset_pa_buffer</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">foreign_buffer</span><span class="p">(</span><span class="n">offset_buff</span><span class="o">.</span><span class="n">ptr</span><span class="p">,</span>
<span class="n">offset_buff</span><span class="o">.</span><span class="n">bufsize</span><span class="p">,</span>
<span class="n">base</span><span class="o">=</span><span class="n">offset_buff</span><span class="p">)</span>
<span class="k">if</span> <span class="n">data_type</span><span class="p">[</span><span class="mi">2</span><span class="p">]</span> <span class="o">==</span> <span class="s1">&#39;U&#39;</span><span class="p">:</span>
<span class="n">string_type</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">large_string</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">if</span> <span class="n">offset_bit_width</span> <span class="o">==</span> <span class="mi">64</span><span class="p">:</span>
<span class="n">string_type</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">large_string</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">string_type</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">string</span><span class="p">()</span>
<span class="n">array</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="o">.</span><span class="n">from_buffers</span><span class="p">(</span>
<span class="n">string_type</span><span class="p">,</span>
<span class="n">length</span><span class="p">,</span>
<span class="p">[</span><span class="n">validity_pa_buff</span><span class="p">,</span> <span class="n">offset_pa_buffer</span><span class="p">,</span> <span class="n">data_pa_buffer</span><span class="p">],</span>
<span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">array</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="o">.</span><span class="n">from_buffers</span><span class="p">(</span>
<span class="n">data_dtype</span><span class="p">,</span>
<span class="n">length</span><span class="p">,</span>
<span class="p">[</span><span class="n">validity_pa_buff</span><span class="p">,</span> <span class="n">data_pa_buffer</span><span class="p">],</span>
<span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">,</span>
<span class="p">)</span>
<span class="k">return</span> <span class="n">array</span>
<span class="k">def</span><span class="w"> </span><span class="nf">validity_buffer_from_mask</span><span class="p">(</span>
<span class="n">validity_buff</span><span class="p">:</span> <span class="n">BufferObject</span><span class="p">,</span>
<span class="n">validity_dtype</span><span class="p">:</span> <span class="n">Dtype</span><span class="p">,</span>
<span class="n">describe_null</span><span class="p">:</span> <span class="n">ColumnNullType</span><span class="p">,</span>
<span class="n">length</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">offset</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Buffer</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Build a PyArrow buffer from the passed mask buffer.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> validity_buff : BufferObject</span>
<span class="sd"> Tuple of underlying validity buffer and associated dtype.</span>
<span class="sd"> validity_dtype : Dtype</span>
<span class="sd"> Dtype description as a tuple ``(kind, bit-width, format string,</span>
<span class="sd"> endianness)``.</span>
<span class="sd"> describe_null : ColumnNullType</span>
<span class="sd"> Null representation the column dtype uses,</span>
<span class="sd"> as a tuple ``(kind, value)``</span>
<span class="sd"> length : int</span>
<span class="sd"> The number of values in the array.</span>
<span class="sd"> offset : int, default: 0</span>
<span class="sd"> Number of elements to offset from the start of the buffer.</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.Buffer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">null_kind</span><span class="p">,</span> <span class="n">sentinel_val</span> <span class="o">=</span> <span class="n">describe_null</span>
<span class="n">validity_kind</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">validity_dtype</span>
<span class="k">assert</span> <span class="n">validity_kind</span> <span class="o">==</span> <span class="n">DtypeKind</span><span class="o">.</span><span class="n">BOOL</span>
<span class="k">if</span> <span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">NON_NULLABLE</span><span class="p">:</span>
<span class="c1"># Sliced array can have a NON_NULLABLE ColumnNullType due</span>
<span class="c1"># to no missing values in that slice of an array though the bitmask</span>
<span class="c1"># exists and validity_buff must be set to None in this case</span>
<span class="k">return</span> <span class="kc">None</span>
<span class="k">elif</span> <span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">USE_BYTEMASK</span> <span class="ow">or</span> <span class="p">(</span>
<span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">USE_BITMASK</span> <span class="ow">and</span> <span class="n">sentinel_val</span> <span class="o">==</span> <span class="mi">1</span>
<span class="p">):</span>
<span class="n">buff</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">foreign_buffer</span><span class="p">(</span><span class="n">validity_buff</span><span class="o">.</span><span class="n">ptr</span><span class="p">,</span>
<span class="n">validity_buff</span><span class="o">.</span><span class="n">bufsize</span><span class="p">,</span>
<span class="n">base</span><span class="o">=</span><span class="n">validity_buff</span><span class="p">)</span>
<span class="k">if</span> <span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">USE_BYTEMASK</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">allow_copy</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
<span class="s2">&quot;To create a bitmask a copy of the data is &quot;</span>
<span class="s2">&quot;required which is forbidden by allow_copy=False&quot;</span>
<span class="p">)</span>
<span class="n">mask</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="o">.</span><span class="n">from_buffers</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">int8</span><span class="p">(),</span> <span class="n">length</span><span class="p">,</span>
<span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="n">buff</span><span class="p">],</span>
<span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">)</span>
<span class="n">mask_bool</span> <span class="o">=</span> <span class="n">pc</span><span class="o">.</span><span class="n">cast</span><span class="p">(</span><span class="n">mask</span><span class="p">,</span> <span class="n">pa</span><span class="o">.</span><span class="n">bool_</span><span class="p">())</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">mask_bool</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="o">.</span><span class="n">from_buffers</span><span class="p">(</span><span class="n">pa</span><span class="o">.</span><span class="n">bool_</span><span class="p">(),</span> <span class="n">length</span><span class="p">,</span>
<span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="n">buff</span><span class="p">],</span>
<span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">)</span>
<span class="k">if</span> <span class="n">sentinel_val</span> <span class="o">==</span> <span class="mi">1</span><span class="p">:</span>
<span class="n">mask_bool</span> <span class="o">=</span> <span class="n">pc</span><span class="o">.</span><span class="n">invert</span><span class="p">(</span><span class="n">mask_bool</span><span class="p">)</span>
<span class="k">return</span> <span class="n">mask_bool</span><span class="o">.</span><span class="n">buffers</span><span class="p">()[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">elif</span> <span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">USE_BITMASK</span> <span class="ow">and</span> <span class="n">sentinel_val</span> <span class="o">==</span> <span class="mi">0</span><span class="p">:</span>
<span class="k">return</span> <span class="n">pa</span><span class="o">.</span><span class="n">foreign_buffer</span><span class="p">(</span><span class="n">validity_buff</span><span class="o">.</span><span class="n">ptr</span><span class="p">,</span>
<span class="n">validity_buff</span><span class="o">.</span><span class="n">bufsize</span><span class="p">,</span>
<span class="n">base</span><span class="o">=</span><span class="n">validity_buff</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">describe_null</span><span class="si">}</span><span class="s2"> null representation is not yet supported.&quot;</span><span class="p">)</span>
<span class="k">def</span><span class="w"> </span><span class="nf">validity_buffer_nan_sentinel</span><span class="p">(</span>
<span class="n">data_pa_buffer</span><span class="p">:</span> <span class="n">BufferObject</span><span class="p">,</span>
<span class="n">data_type</span><span class="p">:</span> <span class="n">Dtype</span><span class="p">,</span>
<span class="n">describe_null</span><span class="p">:</span> <span class="n">ColumnNullType</span><span class="p">,</span>
<span class="n">length</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="n">offset</span><span class="p">:</span> <span class="nb">int</span> <span class="o">=</span> <span class="mi">0</span><span class="p">,</span>
<span class="n">allow_copy</span><span class="p">:</span> <span class="nb">bool</span> <span class="o">=</span> <span class="kc">True</span><span class="p">,</span>
<span class="p">)</span> <span class="o">-&gt;</span> <span class="n">pa</span><span class="o">.</span><span class="n">Buffer</span><span class="p">:</span>
<span class="w"> </span><span class="sd">&quot;&quot;&quot;</span>
<span class="sd"> Build a PyArrow buffer from NaN or sentinel values.</span>
<span class="sd"> Parameters</span>
<span class="sd"> ----------</span>
<span class="sd"> data_pa_buffer : pa.Buffer</span>
<span class="sd"> PyArrow buffer for the column data.</span>
<span class="sd"> data_type : Dtype</span>
<span class="sd"> Dtype description as a tuple ``(kind, bit-width, format string,</span>
<span class="sd"> endianness)``.</span>
<span class="sd"> describe_null : ColumnNullType</span>
<span class="sd"> Null representation the column dtype uses,</span>
<span class="sd"> as a tuple ``(kind, value)``</span>
<span class="sd"> length : int</span>
<span class="sd"> The number of values in the array.</span>
<span class="sd"> offset : int, default: 0</span>
<span class="sd"> Number of elements to offset from the start of the buffer.</span>
<span class="sd"> allow_copy : bool, default: True</span>
<span class="sd"> Whether to allow copying the memory to perform the conversion</span>
<span class="sd"> (if false then zero-copy approach is requested).</span>
<span class="sd"> Returns</span>
<span class="sd"> -------</span>
<span class="sd"> pa.Buffer</span>
<span class="sd"> &quot;&quot;&quot;</span>
<span class="n">kind</span><span class="p">,</span> <span class="n">bit_width</span><span class="p">,</span> <span class="n">_</span><span class="p">,</span> <span class="n">_</span> <span class="o">=</span> <span class="n">data_type</span>
<span class="n">data_dtype</span> <span class="o">=</span> <span class="n">map_date_type</span><span class="p">(</span><span class="n">data_type</span><span class="p">)</span>
<span class="n">null_kind</span><span class="p">,</span> <span class="n">sentinel_val</span> <span class="o">=</span> <span class="n">describe_null</span>
<span class="c1"># Check for float NaN values</span>
<span class="k">if</span> <span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">USE_NAN</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">allow_copy</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
<span class="s2">&quot;To create a bitmask a copy of the data is &quot;</span>
<span class="s2">&quot;required which is forbidden by allow_copy=False&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">kind</span> <span class="o">==</span> <span class="n">DtypeKind</span><span class="o">.</span><span class="n">FLOAT</span> <span class="ow">and</span> <span class="n">bit_width</span> <span class="o">==</span> <span class="mi">16</span><span class="p">:</span>
<span class="c1"># &#39;pyarrow.compute.is_nan&#39; kernel not yet implemented</span>
<span class="c1"># for float16</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">data_type</span><span class="si">}</span><span class="s2"> with </span><span class="si">{</span><span class="n">null_kind</span><span class="si">}</span><span class="s2"> is not yet supported.&quot;</span><span class="p">)</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">pyarrow_data</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="o">.</span><span class="n">from_buffers</span><span class="p">(</span>
<span class="n">data_dtype</span><span class="p">,</span>
<span class="n">length</span><span class="p">,</span>
<span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_pa_buffer</span><span class="p">],</span>
<span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">,</span>
<span class="p">)</span>
<span class="n">mask</span> <span class="o">=</span> <span class="n">pc</span><span class="o">.</span><span class="n">is_nan</span><span class="p">(</span><span class="n">pyarrow_data</span><span class="p">)</span>
<span class="n">mask</span> <span class="o">=</span> <span class="n">pc</span><span class="o">.</span><span class="n">invert</span><span class="p">(</span><span class="n">mask</span><span class="p">)</span>
<span class="k">return</span> <span class="n">mask</span><span class="o">.</span><span class="n">buffers</span><span class="p">()[</span><span class="mi">1</span><span class="p">]</span>
<span class="c1"># Check for sentinel values</span>
<span class="k">elif</span> <span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">USE_SENTINEL</span><span class="p">:</span>
<span class="k">if</span> <span class="ow">not</span> <span class="n">allow_copy</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">RuntimeError</span><span class="p">(</span>
<span class="s2">&quot;To create a bitmask a copy of the data is &quot;</span>
<span class="s2">&quot;required which is forbidden by allow_copy=False&quot;</span>
<span class="p">)</span>
<span class="k">if</span> <span class="n">kind</span> <span class="o">==</span> <span class="n">DtypeKind</span><span class="o">.</span><span class="n">DATETIME</span><span class="p">:</span>
<span class="n">sentinel_dtype</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">int64</span><span class="p">()</span>
<span class="k">else</span><span class="p">:</span>
<span class="n">sentinel_dtype</span> <span class="o">=</span> <span class="n">data_dtype</span>
<span class="n">pyarrow_data</span> <span class="o">=</span> <span class="n">pa</span><span class="o">.</span><span class="n">Array</span><span class="o">.</span><span class="n">from_buffers</span><span class="p">(</span><span class="n">sentinel_dtype</span><span class="p">,</span>
<span class="n">length</span><span class="p">,</span>
<span class="p">[</span><span class="kc">None</span><span class="p">,</span> <span class="n">data_pa_buffer</span><span class="p">],</span>
<span class="n">offset</span><span class="o">=</span><span class="n">offset</span><span class="p">)</span>
<span class="n">sentinel_arr</span> <span class="o">=</span> <span class="n">pc</span><span class="o">.</span><span class="n">equal</span><span class="p">(</span><span class="n">pyarrow_data</span><span class="p">,</span> <span class="n">sentinel_val</span><span class="p">)</span>
<span class="n">mask_bool</span> <span class="o">=</span> <span class="n">pc</span><span class="o">.</span><span class="n">invert</span><span class="p">(</span><span class="n">sentinel_arr</span><span class="p">)</span>
<span class="k">return</span> <span class="n">mask_bool</span><span class="o">.</span><span class="n">buffers</span><span class="p">()[</span><span class="mi">1</span><span class="p">]</span>
<span class="k">elif</span> <span class="n">null_kind</span> <span class="o">==</span> <span class="n">ColumnNullType</span><span class="o">.</span><span class="n">NON_NULLABLE</span><span class="p">:</span>
<span class="k">pass</span>
<span class="k">else</span><span class="p">:</span>
<span class="k">raise</span> <span class="ne">NotImplementedError</span><span class="p">(</span>
<span class="sa">f</span><span class="s2">&quot;</span><span class="si">{</span><span class="n">describe_null</span><span class="si">}</span><span class="s2"> null representation is not yet supported.&quot;</span><span class="p">)</span>
</pre></div>
</article>
<footer class="prev-next-footer d-print-none">
<div class="prev-next-area">
</div>
</footer>
</div>
</div>
<footer class="bd-footer-content">
</footer>
</main>
</div>
</div>
<!-- Scripts loaded after <body> so the DOM is not blocked -->
<script defer src="../../../_static/scripts/bootstrap.js?digest=7f76b32a3354e82990f2"></script>
<script defer src="../../../_static/scripts/pydata-sphinx-theme.js?digest=7f76b32a3354e82990f2"></script>
<footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
<div class="footer-items__start">
<div class="footer-item">
<p class="copyright">
© Copyright 2016-2026 Apache Software Foundation.
Apache Arrow, Arrow, Apache, the Apache logo, and the Apache Arrow project logo are either registered trademarks or trademarks of The Apache Software Foundation in the United States and other countries.
<br/>
</p>
</div>
<div class="footer-item">
<p class="sphinx-version">
Created using <a href="https://www.sphinx-doc.org/">Sphinx</a> 9.1.0.
<br/>
</p>
</div>
</div>
<div class="footer-items__end">
<div class="footer-item">
<p class="theme-version">
<!-- # L10n: Setting the PST URL as an argument as this does not need to be localized -->
Built with the <a href="https://pydata-sphinx-theme.readthedocs.io/en/stable/index.html">PyData Sphinx Theme</a> 0.17.0.
</p></div>
</div>
</div>
</footer>
</body>
</html>