blob: ab55775808fa763d7c20ab92996e09959da2a094 [file] [log] [blame]
<!DOCTYPE html>
<!-- Generated by pkgdown: do not edit by hand --><html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="utf-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><meta name="description" content="A Schema is an Arrow object containing Fields, which map names to
Arrow data types. Create a Schema when you
want to convert an R data.frame to Arrow but don't want to rely on the
default mapping of R types to Arrow types, such as when you want to choose a
specific numeric precision, or when creating a Dataset and you want to
ensure a specific schema rather than inferring it from the various files.
Many Arrow objects, including Table and Dataset, have a $schema method
(active binding) that lets you access their schema."><title>Schema class — Schema • Arrow R Package</title><!-- favicons --><link rel="icon" type="image/png" sizes="16x16" href="../favicon-16x16.png"><link rel="icon" type="image/png" sizes="32x32" href="../favicon-32x32.png"><link rel="apple-touch-icon" type="image/png" sizes="180x180" href="../apple-touch-icon.png"><link rel="apple-touch-icon" type="image/png" sizes="120x120" href="../apple-touch-icon-120x120.png"><link rel="apple-touch-icon" type="image/png" sizes="76x76" href="../apple-touch-icon-76x76.png"><link rel="apple-touch-icon" type="image/png" sizes="60x60" href="../apple-touch-icon-60x60.png"><script src="../deps/jquery-3.6.0/jquery-3.6.0.min.js"></script><meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no"><link href="../deps/bootstrap-5.2.2/bootstrap.min.css" rel="stylesheet"><script src="../deps/bootstrap-5.2.2/bootstrap.bundle.min.js"></script><!-- Font Awesome icons --><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous"><link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous"><!-- bootstrap-toc --><script src="https://cdn.jsdelivr.net/gh/afeld/bootstrap-toc@v1.0.1/dist/bootstrap-toc.min.js" integrity="sha256-4veVQbu7//Lk5TSmc7YV48MxtMy98e26cf5MrgZYnwo=" crossorigin="anonymous"></script><!-- headroom.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script><!-- clipboard.js --><script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script><!-- search --><script src="https://cdnjs.cloudflare.com/ajax/libs/fuse.js/6.4.6/fuse.js" integrity="sha512-zv6Ywkjyktsohkbp9bb45V6tEMoWhzFzXis+LrMehmJZZSys19Yxf1dopHx7WzIKxr5tK2dVcYmaCk2uqdjF4A==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/autocomplete.js/0.38.0/autocomplete.jquery.min.js" integrity="sha512-GU9ayf+66Xx2TmpxqJpliWbT5PiGYxpaG8rfnBEk1LL8l1KGkRShhngwdXK1UgqhAzWpZHSiYPc09/NwDQIGyg==" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mark.js/8.11.1/mark.min.js" integrity="sha512-5CYOlHXGh6QpOFA/TeTylKLWfB3ftPsde7AnmhuitiTX4K5SqCLBeKro6sPS8ilsz1Q4NRx3v8Ko2IBiszzdww==" crossorigin="anonymous"></script><!-- pkgdown --><script src="../pkgdown.js"></script><script src="../extra.js"></script><meta property="og:title" content="Schema class — Schema"><meta property="og:description" content="A Schema is an Arrow object containing Fields, which map names to
Arrow data types. Create a Schema when you
want to convert an R data.frame to Arrow but don't want to rely on the
default mapping of R types to Arrow types, such as when you want to choose a
specific numeric precision, or when creating a Dataset and you want to
ensure a specific schema rather than inferring it from the various files.
Many Arrow objects, including Table and Dataset, have a $schema method
(active binding) that lets you access their schema."><meta property="og:image" content="https://arrow.apache.org/img/arrow-logo_horizontal_black-txt_white-bg.png"><meta property="og:image:alt" content="Apache Arrow logo, displaying the triple chevron image adjacent to the text"><meta name="twitter:card" content="summary_large_image"><meta name="twitter:creator" content="@apachearrow"><meta name="twitter:site" content="@apachearrow"><!-- mathjax --><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script><script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script><!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]--><!-- Matomo --><script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
/* We explicitly disable cookie tracking to avoid privacy issues */
_paq.push(['disableCookies']);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script><!-- End Matomo Code --></head><body>
<a href="#main" class="visually-hidden-focusable">Skip to contents</a>
<nav class="navbar fixed-top navbar-dark navbar-expand-lg bg-black"><div class="container">
<a class="navbar-brand me-2" href="../index.html">Arrow R Package</a>
<span class="version">
<small class="nav-text text-muted me-auto" data-bs-toggle="tooltip" data-bs-placement="bottom" title="">11.0.0</small>
</span>
<button class="navbar-toggler" type="button" data-bs-toggle="collapse" data-bs-target="#navbar" aria-controls="navbar" aria-expanded="false" aria-label="Toggle navigation">
<span class="navbar-toggler-icon"></span>
</button>
<div id="navbar" class="collapse navbar-collapse ms-3">
<ul class="navbar-nav me-auto"><li class="nav-item">
<a class="nav-link" href="../articles/arrow.html">Get started</a>
</li>
<li class="active nav-item">
<a class="nav-link" href="../reference/index.html">Reference</a>
</li>
<li class="nav-item dropdown">
<a href="#" class="nav-link dropdown-toggle" data-bs-toggle="dropdown" role="button" aria-expanded="false" aria-haspopup="true" id="dropdown-articles">Articles</a>
<div class="dropdown-menu" aria-labelledby="dropdown-articles">
<h6 class="dropdown-header" data-toc-skip>Using the package</h6>
<a class="dropdown-item" href="../articles/read_write.html">Reading and writing data files</a>
<a class="dropdown-item" href="../articles/data_wrangling.html">Data analysis with dplyr syntax</a>
<a class="dropdown-item" href="../articles/dataset.html">Working with multi-file data sets</a>
<a class="dropdown-item" href="../articles/python.html">Integrating Arrow, Python, and R</a>
<a class="dropdown-item" href="../articles/fs.html">Using cloud storage (S3, GCS)</a>
<a class="dropdown-item" href="../articles/flight.html">Connecting to a Flight server</a>
<div class="dropdown-divider"></div>
<h6 class="dropdown-header" data-toc-skip>Arrow concepts</h6>
<a class="dropdown-item" href="../articles/data_objects.html">Data objects</a>
<a class="dropdown-item" href="../articles/data_types.html">Data types</a>
<a class="dropdown-item" href="../articles/metadata.html">Metadata</a>
<div class="dropdown-divider"></div>
<h6 class="dropdown-header" data-toc-skip>Installation</h6>
<a class="dropdown-item" href="../articles/install.html">Installing on Linux</a>
<a class="dropdown-item" href="../articles/install_nightly.html">Installing development versions</a>
<div class="dropdown-divider"></div>
<a class="dropdown-item" href="../articles/index.html">More articles...</a>
</div>
</li>
<li class="nav-item">
<a class="nav-link" href="../news/index.html">Changelog</a>
</li>
</ul><form class="form-inline my-2 my-lg-0" role="search">
<input type="search" class="form-control me-sm-2" aria-label="Toggle navigation" name="search-input" data-search-index="../search.json" id="search-input" placeholder="Search for" autocomplete="off"></form>
<ul class="navbar-nav"></ul></div>
</div>
</nav><div class="container template-reference-topic">
<div class="row">
<main id="main" class="col-md-9"><div class="page-header">
<img src="" class="logo" alt=""><h1>Schema class</h1>
<small class="dont-index">Source: <a href="https://github.com/apache/arrow/blob/master/r/R/schema.R" class="external-link"><code>R/schema.R</code></a></small>
<div class="d-none name"><code>Schema.Rd</code></div>
</div>
<div class="ref-description section level2">
<p>A <code>Schema</code> is an Arrow object containing <a href="Field.html">Field</a>s, which map names to
Arrow <a href="data-type.html">data types</a>. Create a <code>Schema</code> when you
want to convert an R <code>data.frame</code> to Arrow but don't want to rely on the
default mapping of R types to Arrow types, such as when you want to choose a
specific numeric precision, or when creating a <a href="Dataset.html">Dataset</a> and you want to
ensure a specific schema rather than inferring it from the various files.</p>
<p>Many Arrow objects, including <a href="Table.html">Table</a> and <a href="Dataset.html">Dataset</a>, have a <code>$schema</code> method
(active binding) that lets you access their schema.</p>
</div>
<div class="section level2">
<h2 id="ref-usage">Usage<a class="anchor" aria-label="anchor" href="#ref-usage"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span><span class="fu">schema</span><span class="op">(</span><span class="va">...</span><span class="op">)</span></span></code></pre></div>
</div>
<div class="section level2">
<h2 id="arguments">Arguments<a class="anchor" aria-label="anchor" href="#arguments"></a></h2>
<dl><dt>...</dt>
<dd><p><a href="Field.html">fields</a> or field name/<a href="data-type.html">data type</a> pairs</p></dd>
</dl></div>
<div class="section level2">
<h2 id="methods">Methods<a class="anchor" aria-label="anchor" href="#methods"></a></h2>
<ul><li><p><code>$ToString()</code>: convert to a string</p></li>
<li><p><code>$field(i)</code>: returns the field at index <code>i</code> (0-based)</p></li>
<li><p><code>$GetFieldByName(x)</code>: returns the field with name <code>x</code></p></li>
<li><p><code>$WithMetadata(metadata)</code>: returns a new <code>Schema</code> with the key-value
<code>metadata</code> set. Note that all list elements in <code>metadata</code> will be coerced
to <code>character</code>.</p></li>
</ul></div>
<div class="section level2">
<h2 id="active-bindings">Active bindings<a class="anchor" aria-label="anchor" href="#active-bindings"></a></h2>
<ul><li><p><code>$names</code>: returns the field names (called in <code>names(Schema)</code>)</p></li>
<li><p><code>$num_fields</code>: returns the number of fields (called in <code>length(Schema)</code>)</p></li>
<li><p><code>$fields</code>: returns the list of <code>Field</code>s in the <code>Schema</code>, suitable for
iterating over</p></li>
<li><p><code>$HasMetadata</code>: logical: does this <code>Schema</code> have extra metadata?</p></li>
<li><p><code>$metadata</code>: returns the key-value metadata as a named list.
Modify or replace by assigning in (<code>sch$metadata &lt;- new_metadata</code>).
All list elements are coerced to string.</p></li>
</ul></div>
<div class="section level2">
<h2 id="r-metadata">R Metadata<a class="anchor" aria-label="anchor" href="#r-metadata"></a></h2>
<p>When converting a data.frame to an Arrow Table or RecordBatch, attributes
from the <code>data.frame</code> are saved alongside tables so that the object can be
reconstructed faithfully in R (e.g. with <code><a href="https://rdrr.io/r/base/as.data.frame.html" class="external-link">as.data.frame()</a></code>). This metadata
can be both at the top-level of the <code>data.frame</code> (e.g. <code>attributes(df)</code>) or
at the column (e.g. <code>attributes(df$col_a)</code>) or for list columns only:
element level (e.g. <code>attributes(df[1, "col_a"])</code>). For example, this allows
for storing <code>haven</code> columns in a table and being able to faithfully
re-create them when pulled back into R. This metadata is separate from the
schema (column names and types) which is compatible with other Arrow
clients. The R metadata is only read by R and is ignored by other clients
(e.g. Pandas has its own custom metadata). This metadata is stored in
<code>$metadata$r</code>.</p>
<p>Since Schema metadata keys and values must be strings, this metadata is
saved by serializing R's attribute list structure to a string. If the
serialized metadata exceeds 100Kb in size, by default it is compressed
starting in version 3.0.0. To disable this compression (e.g. for tables
that are compatible with Arrow versions before 3.0.0 and include large
amounts of metadata), set the option <code>arrow.compress_metadata</code> to <code>FALSE</code>.
Files with compressed metadata are readable by older versions of arrow, but
the metadata is dropped.</p>
</div>
<div class="section level2">
<h2 id="ref-examples">Examples<a class="anchor" aria-label="anchor" href="#ref-examples"></a></h2>
<div class="sourceCode"><pre class="sourceCode r"><code><span class="r-in"><span><span class="fu">schema</span><span class="op">(</span>a <span class="op">=</span> <span class="fu"><a href="data-type.html">int32</a></span><span class="op">(</span><span class="op">)</span>, b <span class="op">=</span> <span class="fu"><a href="data-type.html">float64</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Schema</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> a: int32</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> b: double</span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="fu">schema</span><span class="op">(</span></span></span>
<span class="r-in"><span> <span class="fu"><a href="Field.html">field</a></span><span class="op">(</span><span class="st">"b"</span>, <span class="fu"><a href="https://rdrr.io/r/base/double.html" class="external-link">double</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span>,</span></span>
<span class="r-in"><span> <span class="fu"><a href="Field.html">field</a></span><span class="op">(</span><span class="st">"c"</span>, <span class="fu"><a href="data-type.html">bool</a></span><span class="op">(</span><span class="op">)</span>, nullable <span class="op">=</span> <span class="cn">FALSE</span><span class="op">)</span>,</span></span>
<span class="r-in"><span> <span class="fu"><a href="Field.html">field</a></span><span class="op">(</span><span class="st">"d"</span>, <span class="fu"><a href="data-type.html">string</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="op">)</span></span></span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Schema</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> b: double</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> c: bool not null</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> d: string</span>
<span class="r-in"><span></span></span>
<span class="r-in"><span><span class="va">df</span> <span class="op">&lt;-</span> <span class="fu"><a href="https://rdrr.io/r/base/data.frame.html" class="external-link">data.frame</a></span><span class="op">(</span>col1 <span class="op">=</span> <span class="fl">2</span><span class="op">:</span><span class="fl">4</span>, col2 <span class="op">=</span> <span class="fu"><a href="https://rdrr.io/r/base/c.html" class="external-link">c</a></span><span class="op">(</span><span class="fl">0.1</span>, <span class="fl">0.3</span>, <span class="fl">0.5</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">tab1</span> <span class="op">&lt;-</span> <span class="fu"><a href="Table.html">arrow_table</a></span><span class="op">(</span><span class="va">df</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">tab1</span><span class="op">$</span><span class="va">schema</span></span></span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Schema</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> col1: int32</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> col2: double</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> </span>
<span class="r-out co"><span class="r-pr">#&gt;</span> See $metadata for additional Schema metadata</span>
<span class="r-in"><span><span class="va">tab2</span> <span class="op">&lt;-</span> <span class="fu"><a href="Table.html">arrow_table</a></span><span class="op">(</span><span class="va">df</span>, schema <span class="op">=</span> <span class="fu">schema</span><span class="op">(</span>col1 <span class="op">=</span> <span class="fu"><a href="data-type.html">int8</a></span><span class="op">(</span><span class="op">)</span>, col2 <span class="op">=</span> <span class="fu"><a href="data-type.html">float32</a></span><span class="op">(</span><span class="op">)</span><span class="op">)</span><span class="op">)</span></span></span>
<span class="r-in"><span><span class="va">tab2</span><span class="op">$</span><span class="va">schema</span></span></span>
<span class="r-out co"><span class="r-pr">#&gt;</span> Schema</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> col1: int8</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> col2: float</span>
<span class="r-out co"><span class="r-pr">#&gt;</span> </span>
<span class="r-out co"><span class="r-pr">#&gt;</span> See $metadata for additional Schema metadata</span>
</code></pre></div>
</div>
</main><aside class="col-md-3"><nav id="toc"><h2>On this page</h2>
</nav></aside></div>
<footer><div class="pkgdown-footer-left">
<p></p><p><a href="https://arrow.apache.org/docs/r/versions.html">Older versions of these docs</a></p>
</div>
<div class="pkgdown-footer-right">
<p></p><p>Site built with <a href="https://pkgdown.r-lib.org/" class="external-link">pkgdown</a> 2.0.7.</p>
</div>
</footer></div>
</body></html>