blob: dfd2ec9b8eda9bea150ca3e379dca5a245a1aaad [file] [log] [blame]
<!-- Generated by pkgdown: do not edit by hand -->
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Write Parquet file to disk — write_parquet • Arrow R Package</title>
<!-- jquery -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script>
<!-- Bootstrap -->
<link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous" />
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script>
<!-- bootstrap-toc -->
<link rel="stylesheet" href="../bootstrap-toc.css">
<script src="../bootstrap-toc.js"></script>
<!-- Font Awesome icons -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" />
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" />
<!-- clipboard.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script>
<!-- headroom.js -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script>
<!-- pkgdown -->
<link href="../pkgdown.css" rel="stylesheet">
<script src="../pkgdown.js"></script>
<script src="../extra.js"></script>
<meta property="og:title" content="Write Parquet file to disk — write_parquet" />
<meta property="og:description" content="Parquet is a columnar storage file format.
This function enables you to write Parquet files from R." />
<!-- mathjax -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script>
<!--[if lt IE 9]>
<script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script>
<script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script>
<![endif]-->
<!-- Matomo -->
<script>
var _paq = window._paq = window._paq || [];
/* tracker methods like "setCustomDimension" should be called before "trackPageView" */
_paq.push(["setDoNotTrack", true]);
_paq.push(["disableCookies"]);
_paq.push(['trackPageView']);
_paq.push(['enableLinkTracking']);
(function() {
var u="https://analytics.apache.org/";
_paq.push(['setTrackerUrl', u+'matomo.php']);
_paq.push(['setSiteId', '20']);
var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
})();
</script>
<!-- End Matomo Code -->
</head>
<body data-spy="scroll" data-target="#toc">
<div class="container template-reference-topic">
<header>
<div class="navbar navbar-default navbar-fixed-top" role="navigation">
<div class="container">
<div class="navbar-header">
<button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false">
<span class="sr-only">Toggle navigation</span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
<span class="icon-bar"></span>
</button>
<span class="navbar-brand">
<a class="navbar-link" href="../index.html">Arrow R Package</a>
<span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">4.0.1</span>
</span>
</div>
<div id="navbar" class="navbar-collapse collapse">
<ul class="nav navbar-nav">
<li>
<a href="https://arrow.apache.org/">❯❯❯</a>
</li>
<li>
<a href="../articles/arrow.html">Get started</a>
</li>
<li>
<a href="../reference/index.html">Reference</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
Articles
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="../articles/install.html">Installing the Arrow Package on Linux</a>
</li>
<li>
<a href="../articles/dataset.html">Working with Arrow Datasets and dplyr</a>
</li>
<li>
<a href="../articles/fs.html">Working with Cloud Storage (S3)</a>
</li>
<li>
<a href="../articles/python.html">Apache Arrow in Python and R with reticulate</a>
</li>
<li>
<a href="../articles/flight.html">Connecting to Flight RPC Servers</a>
</li>
<li>
<a href="../articles/developing.html">Arrow R Developer Guide</a>
</li>
</ul>
</li>
<li>
<a href="../news/index.html">Changelog</a>
</li>
<li class="dropdown">
<a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false">
Project docs
<span class="caret"></span>
</a>
<ul class="dropdown-menu" role="menu">
<li>
<a href="https://arrow.apache.org/docs/format/README.html">Specification</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/c_glib">C GLib</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/cpp">C++</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/java">Java</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/js">JavaScript</a>
</li>
<li>
<a href="https://arrow.apache.org/docs/python">Python</a>
</li>
<li>
<a href="../index.html">R</a>
</li>
</ul>
</li>
</ul>
<ul class="nav navbar-nav navbar-right">
</ul>
</div><!--/.nav-collapse -->
</div><!--/.container -->
</div><!--/.navbar -->
</header>
<div class="row">
<div class="col-md-9 contents">
<div class="page-header">
<h1>Write Parquet file to disk</h1>
<small class="dont-index">Source: <a href='https://github.com/apache/arrow/blob/master/r/R/parquet.R'><code>R/parquet.R</code></a></small>
<div class="hidden name"><code>write_parquet.Rd</code></div>
</div>
<div class="ref-description">
<p><a href='https://parquet.apache.org/'>Parquet</a> is a columnar storage file format.
This function enables you to write Parquet files from R.</p>
</div>
<pre class="usage"><span class='fu'>write_parquet</span><span class='op'>(</span>
<span class='va'>x</span>,
<span class='va'>sink</span>,
chunk_size <span class='op'>=</span> <span class='cn'>NULL</span>,
version <span class='op'>=</span> <span class='cn'>NULL</span>,
compression <span class='op'>=</span> <span class='fu'>default_parquet_compression</span><span class='op'>(</span><span class='op'>)</span>,
compression_level <span class='op'>=</span> <span class='cn'>NULL</span>,
use_dictionary <span class='op'>=</span> <span class='cn'>NULL</span>,
write_statistics <span class='op'>=</span> <span class='cn'>NULL</span>,
data_page_size <span class='op'>=</span> <span class='cn'>NULL</span>,
use_deprecated_int96_timestamps <span class='op'>=</span> <span class='cn'>FALSE</span>,
coerce_timestamps <span class='op'>=</span> <span class='cn'>NULL</span>,
allow_truncated_timestamps <span class='op'>=</span> <span class='cn'>FALSE</span>,
properties <span class='op'>=</span> <span class='cn'>NULL</span>,
arrow_properties <span class='op'>=</span> <span class='cn'>NULL</span>
<span class='op'>)</span></pre>
<h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2>
<table class="ref-arguments">
<colgroup><col class="name" /><col class="desc" /></colgroup>
<tr>
<th>x</th>
<td><p><code>data.frame</code>, <a href='RecordBatch.html'>RecordBatch</a>, or <a href='Table.html'>Table</a></p></td>
</tr>
<tr>
<th>sink</th>
<td><p>A string file path, URI, or <a href='OutputStream.html'>OutputStream</a>, or path in a file
system (<code>SubTreeFileSystem</code>)</p></td>
</tr>
<tr>
<th>chunk_size</th>
<td><p>chunk size in number of rows. If NULL, the total number of rows is used.</p></td>
</tr>
<tr>
<th>version</th>
<td><p>parquet version, "1.0" or "2.0". Default "1.0". Numeric values
are coerced to character.</p></td>
</tr>
<tr>
<th>compression</th>
<td><p>compression algorithm. Default "snappy". See details.</p></td>
</tr>
<tr>
<th>compression_level</th>
<td><p>compression level. Meaning depends on compression algorithm</p></td>
</tr>
<tr>
<th>use_dictionary</th>
<td><p>Specify if we should use dictionary encoding. Default <code>TRUE</code></p></td>
</tr>
<tr>
<th>write_statistics</th>
<td><p>Specify if we should write statistics. Default <code>TRUE</code></p></td>
</tr>
<tr>
<th>data_page_size</th>
<td><p>Set a target threshold for the approximate encoded
size of data pages within a column chunk (in bytes). Default 1 MiB.</p></td>
</tr>
<tr>
<th>use_deprecated_int96_timestamps</th>
<td><p>Write timestamps to INT96 Parquet format. Default <code>FALSE</code>.</p></td>
</tr>
<tr>
<th>coerce_timestamps</th>
<td><p>Cast timestamps a particular resolution. Can be
<code>NULL</code>, "ms" or "us". Default <code>NULL</code> (no casting)</p></td>
</tr>
<tr>
<th>allow_truncated_timestamps</th>
<td><p>Allow loss of data when coercing timestamps to a
particular resolution. E.g. if microsecond or nanosecond data is lost when coercing
to "ms", do not raise an exception</p></td>
</tr>
<tr>
<th>properties</th>
<td><p>A <code>ParquetWriterProperties</code> object, used instead of the options
enumerated in this function's signature. Providing <code>properties</code> as an argument
is deprecated; if you need to assemble <code>ParquetWriterProperties</code> outside
of <code>write_parquet()</code>, use <code>ParquetFileWriter</code> instead.</p></td>
</tr>
<tr>
<th>arrow_properties</th>
<td><p>A <code>ParquetArrowWriterProperties</code> object. Like
<code>properties</code>, this argument is deprecated.</p></td>
</tr>
</table>
<h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2>
<p>the input <code>x</code> invisibly.</p>
<h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2>
<p>Due to features of the format, Parquet files cannot be appended to.
If you want to use the Parquet format but also want the ability to extend
your dataset, you can write to additional Parquet files and then treat
the whole directory of files as a <a href='Dataset.html'>Dataset</a> you can query.
See <code><a href='../articles/dataset.html'>vignette("dataset", package = "arrow")</a></code> for examples of this.</p>
<p>The parameters <code>compression</code>, <code>compression_level</code>, <code>use_dictionary</code> and
<code>write_statistics</code> support various patterns:</p><ul>
<li><p>The default <code>NULL</code> leaves the parameter unspecified, and the C++ library
uses an appropriate default for each column (defaults listed above)</p></li>
<li><p>A single, unnamed, value (e.g. a single string for <code>compression</code>) applies to all columns</p></li>
<li><p>An unnamed vector, of the same size as the number of columns, to specify a
value for each column, in positional order</p></li>
<li><p>A named vector, to specify the value for the named columns, the default
value for the setting is used when not supplied</p></li>
</ul>
<p>The <code>compression</code> argument can be any of the following (case insensitive):
"uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2".
Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip"
are almost always included. See <code><a href='codec_is_available.html'>codec_is_available()</a></code>.
The default "snappy" is used if available, otherwise "uncompressed". To
disable compression, set <code>compression = "uncompressed"</code>.
Note that "uncompressed" columns may still have dictionary encoding.</p>
<h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2>
<pre class="examples"><div class='input'><span class='kw'>if</span> <span class='op'>(</span><span class='cn'>FALSE</span><span class='op'>)</span> <span class='op'>{</span>
<span class='va'>tf1</span> <span class='op'>&lt;-</span> <span class='fu'><a href='https://rdrr.io/r/base/tempfile.html'>tempfile</a></span><span class='op'>(</span>fileext <span class='op'>=</span> <span class='st'>".parquet"</span><span class='op'>)</span>
<span class='fu'>write_parquet</span><span class='op'>(</span><span class='fu'><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></span><span class='op'>(</span>x <span class='op'>=</span> <span class='fl'>1</span><span class='op'>:</span><span class='fl'>5</span><span class='op'>)</span>, <span class='va'>tf1</span><span class='op'>)</span>
<span class='co'># using compression</span>
<span class='kw'>if</span> <span class='op'>(</span><span class='fu'><a href='codec_is_available.html'>codec_is_available</a></span><span class='op'>(</span><span class='st'>"gzip"</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span>
<span class='va'>tf2</span> <span class='op'>&lt;-</span> <span class='fu'><a href='https://rdrr.io/r/base/tempfile.html'>tempfile</a></span><span class='op'>(</span>fileext <span class='op'>=</span> <span class='st'>".gz.parquet"</span><span class='op'>)</span>
<span class='fu'>write_parquet</span><span class='op'>(</span><span class='fu'><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></span><span class='op'>(</span>x <span class='op'>=</span> <span class='fl'>1</span><span class='op'>:</span><span class='fl'>5</span><span class='op'>)</span>, <span class='va'>tf2</span>, compression <span class='op'>=</span> <span class='st'>"gzip"</span>, compression_level <span class='op'>=</span> <span class='fl'>5</span><span class='op'>)</span>
<span class='op'>}</span>
<span class='op'>}</span>
</div></pre>
</div>
<div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar">
<nav id="toc" data-toggle="toc" class="sticky-top">
<h2 data-toc-skip>Contents</h2>
</nav>
</div>
</div>
<footer>
<div class="copyright">
<p>Developed by Neal Richardson, Ian Cook, Jonathan Keane, Romain François, Jeroen Ooms, Apache Arrow.</p>
</div>
<div class="pkgdown">
<p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p>
</div>
</footer>
</div>
<script type="text/javascript" src="/docs/_static/versionwarning.js"></script> </body>
</html>