| <!-- Generated by pkgdown: do not edit by hand --> |
| <!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="utf-8"> |
| <meta http-equiv="X-UA-Compatible" content="IE=edge"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| |
| <title>Write Parquet file to disk — write_parquet • Arrow R Package</title> |
| |
| |
| <!-- jquery --> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.4.1/jquery.min.js" integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo=" crossorigin="anonymous"></script> |
| <!-- Bootstrap --> |
| <link href="https://cdnjs.cloudflare.com/ajax/libs/bootswatch/3.4.0/cosmo/bootstrap.min.css" rel="stylesheet" crossorigin="anonymous" /> |
| |
| |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/3.4.1/js/bootstrap.min.js" integrity="sha256-nuL8/2cJ5NDSSwnKD8VqreErSWHtnEP9E7AySL+1ev4=" crossorigin="anonymous"></script> |
| |
| <!-- bootstrap-toc --> |
| <link rel="stylesheet" href="../bootstrap-toc.css"> |
| <script src="../bootstrap-toc.js"></script> |
| |
| <!-- Font Awesome icons --> |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/all.min.css" integrity="sha256-mmgLkCYLUQbXn0B1SRqzHar6dCnv9oZFPEC1g1cwlkk=" crossorigin="anonymous" /> |
| <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.12.1/css/v4-shims.min.css" integrity="sha256-wZjR52fzng1pJHwx4aV2AO3yyTOXrcDW7jBpJtTwVxw=" crossorigin="anonymous" /> |
| |
| <!-- clipboard.js --> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.6/clipboard.min.js" integrity="sha256-inc5kl9MA1hkeYUt+EC3BhlIgyp/2jDIyBLS6k3UxPI=" crossorigin="anonymous"></script> |
| |
| <!-- headroom.js --> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/headroom.min.js" integrity="sha256-AsUX4SJE1+yuDu5+mAVzJbuYNPHj/WroHuZ8Ir/CkE0=" crossorigin="anonymous"></script> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/headroom/0.11.0/jQuery.headroom.min.js" integrity="sha256-ZX/yNShbjqsohH1k95liqY9Gd8uOiE1S4vZc+9KQ1K4=" crossorigin="anonymous"></script> |
| |
| <!-- pkgdown --> |
| <link href="../pkgdown.css" rel="stylesheet"> |
| <script src="../pkgdown.js"></script> |
| |
| |
| |
| |
| <script src="../extra.js"></script> |
| |
| <meta property="og:title" content="Write Parquet file to disk — write_parquet" /> |
| <meta property="og:description" content="Parquet is a columnar storage file format. |
| This function enables you to write Parquet files from R." /> |
| |
| |
| |
| |
| <!-- mathjax --> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/MathJax.js" integrity="sha256-nvJJv9wWKEm88qvoQl9ekL2J+k/RWIsaSScxxlsrv8k=" crossorigin="anonymous"></script> |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/config/TeX-AMS-MML_HTMLorMML.js" integrity="sha256-84DKXVJXs0/F8OTMzX4UR909+jtl4G7SPypPavF+GfA=" crossorigin="anonymous"></script> |
| |
| <!--[if lt IE 9]> |
| <script src="https://oss.maxcdn.com/html5shiv/3.7.3/html5shiv.min.js"></script> |
| <script src="https://oss.maxcdn.com/respond/1.4.2/respond.min.js"></script> |
| <![endif]--> |
| |
| <!-- Matomo -->
|
| <script>
|
| var _paq = window._paq = window._paq || [];
|
| /* tracker methods like "setCustomDimension" should be called before "trackPageView" */
|
| _paq.push(["setDoNotTrack", true]);
|
| _paq.push(["disableCookies"]);
|
| _paq.push(['trackPageView']);
|
| _paq.push(['enableLinkTracking']);
|
| (function() {
|
| var u="https://analytics.apache.org/";
|
| _paq.push(['setTrackerUrl', u+'matomo.php']);
|
| _paq.push(['setSiteId', '20']);
|
| var d=document, g=d.createElement('script'), s=d.getElementsByTagName('script')[0];
|
| g.async=true; g.src=u+'matomo.js'; s.parentNode.insertBefore(g,s);
|
| })();
|
| </script>
|
| <!-- End Matomo Code -->
|
|
|
| </head>
|
| |
| <body data-spy="scroll" data-target="#toc"> |
| <div class="container template-reference-topic"> |
| <header> |
| <div class="navbar navbar-default navbar-fixed-top" role="navigation"> |
| <div class="container"> |
| <div class="navbar-header"> |
| <button type="button" class="navbar-toggle collapsed" data-toggle="collapse" data-target="#navbar" aria-expanded="false"> |
| <span class="sr-only">Toggle navigation</span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| <span class="icon-bar"></span> |
| </button> |
| <span class="navbar-brand"> |
| <a class="navbar-link" href="../index.html">Arrow R Package</a> |
| <span class="version label label-default" data-toggle="tooltip" data-placement="bottom" title="Released version">4.0.1</span> |
| </span> |
| </div> |
| |
| <div id="navbar" class="navbar-collapse collapse"> |
| <ul class="nav navbar-nav"> |
| <li> |
| <a href="https://arrow.apache.org/">❯❯❯</a> |
| </li> |
| <li> |
| <a href="../articles/arrow.html">Get started</a> |
| </li> |
| <li> |
| <a href="../reference/index.html">Reference</a> |
| </li> |
| <li class="dropdown"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"> |
| Articles |
| |
| <span class="caret"></span> |
| </a> |
| <ul class="dropdown-menu" role="menu"> |
| <li> |
| <a href="../articles/install.html">Installing the Arrow Package on Linux</a> |
| </li> |
| <li> |
| <a href="../articles/dataset.html">Working with Arrow Datasets and dplyr</a> |
| </li> |
| <li> |
| <a href="../articles/fs.html">Working with Cloud Storage (S3)</a> |
| </li> |
| <li> |
| <a href="../articles/python.html">Apache Arrow in Python and R with reticulate</a> |
| </li> |
| <li> |
| <a href="../articles/flight.html">Connecting to Flight RPC Servers</a> |
| </li> |
| <li> |
| <a href="../articles/developing.html">Arrow R Developer Guide</a> |
| </li> |
| </ul> |
| </li> |
| <li> |
| <a href="../news/index.html">Changelog</a> |
| </li> |
| <li class="dropdown"> |
| <a href="#" class="dropdown-toggle" data-toggle="dropdown" role="button" aria-expanded="false"> |
| Project docs |
| |
| <span class="caret"></span> |
| </a> |
| <ul class="dropdown-menu" role="menu"> |
| <li> |
| <a href="https://arrow.apache.org/docs/format/README.html">Specification</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/c_glib">C GLib</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/cpp">C++</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/java">Java</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/js">JavaScript</a> |
| </li> |
| <li> |
| <a href="https://arrow.apache.org/docs/python">Python</a> |
| </li> |
| <li> |
| <a href="../index.html">R</a> |
| </li> |
| </ul> |
| </li> |
| </ul> |
| <ul class="nav navbar-nav navbar-right"> |
| |
| </ul> |
| |
| </div><!--/.nav-collapse --> |
| </div><!--/.container --> |
| </div><!--/.navbar --> |
| |
| |
| |
| </header> |
| |
| <div class="row"> |
| <div class="col-md-9 contents"> |
| <div class="page-header"> |
| <h1>Write Parquet file to disk</h1> |
| <small class="dont-index">Source: <a href='https://github.com/apache/arrow/blob/master/r/R/parquet.R'><code>R/parquet.R</code></a></small> |
| <div class="hidden name"><code>write_parquet.Rd</code></div> |
| </div> |
| |
| <div class="ref-description"> |
| <p><a href='https://parquet.apache.org/'>Parquet</a> is a columnar storage file format. |
| This function enables you to write Parquet files from R.</p> |
| </div> |
| |
| <pre class="usage"><span class='fu'>write_parquet</span><span class='op'>(</span> |
| <span class='va'>x</span>, |
| <span class='va'>sink</span>, |
| chunk_size <span class='op'>=</span> <span class='cn'>NULL</span>, |
| version <span class='op'>=</span> <span class='cn'>NULL</span>, |
| compression <span class='op'>=</span> <span class='fu'>default_parquet_compression</span><span class='op'>(</span><span class='op'>)</span>, |
| compression_level <span class='op'>=</span> <span class='cn'>NULL</span>, |
| use_dictionary <span class='op'>=</span> <span class='cn'>NULL</span>, |
| write_statistics <span class='op'>=</span> <span class='cn'>NULL</span>, |
| data_page_size <span class='op'>=</span> <span class='cn'>NULL</span>, |
| use_deprecated_int96_timestamps <span class='op'>=</span> <span class='cn'>FALSE</span>, |
| coerce_timestamps <span class='op'>=</span> <span class='cn'>NULL</span>, |
| allow_truncated_timestamps <span class='op'>=</span> <span class='cn'>FALSE</span>, |
| properties <span class='op'>=</span> <span class='cn'>NULL</span>, |
| arrow_properties <span class='op'>=</span> <span class='cn'>NULL</span> |
| <span class='op'>)</span></pre> |
| |
| <h2 class="hasAnchor" id="arguments"><a class="anchor" href="#arguments"></a>Arguments</h2> |
| <table class="ref-arguments"> |
| <colgroup><col class="name" /><col class="desc" /></colgroup> |
| <tr> |
| <th>x</th> |
| <td><p><code>data.frame</code>, <a href='RecordBatch.html'>RecordBatch</a>, or <a href='Table.html'>Table</a></p></td> |
| </tr> |
| <tr> |
| <th>sink</th> |
| <td><p>A string file path, URI, or <a href='OutputStream.html'>OutputStream</a>, or path in a file |
| system (<code>SubTreeFileSystem</code>)</p></td> |
| </tr> |
| <tr> |
| <th>chunk_size</th> |
| <td><p>chunk size in number of rows. If NULL, the total number of rows is used.</p></td> |
| </tr> |
| <tr> |
| <th>version</th> |
| <td><p>parquet version, "1.0" or "2.0". Default "1.0". Numeric values |
| are coerced to character.</p></td> |
| </tr> |
| <tr> |
| <th>compression</th> |
| <td><p>compression algorithm. Default "snappy". See details.</p></td> |
| </tr> |
| <tr> |
| <th>compression_level</th> |
| <td><p>compression level. Meaning depends on compression algorithm</p></td> |
| </tr> |
| <tr> |
| <th>use_dictionary</th> |
| <td><p>Specify if we should use dictionary encoding. Default <code>TRUE</code></p></td> |
| </tr> |
| <tr> |
| <th>write_statistics</th> |
| <td><p>Specify if we should write statistics. Default <code>TRUE</code></p></td> |
| </tr> |
| <tr> |
| <th>data_page_size</th> |
| <td><p>Set a target threshold for the approximate encoded |
| size of data pages within a column chunk (in bytes). Default 1 MiB.</p></td> |
| </tr> |
| <tr> |
| <th>use_deprecated_int96_timestamps</th> |
| <td><p>Write timestamps to INT96 Parquet format. Default <code>FALSE</code>.</p></td> |
| </tr> |
| <tr> |
| <th>coerce_timestamps</th> |
| <td><p>Cast timestamps a particular resolution. Can be |
| <code>NULL</code>, "ms" or "us". Default <code>NULL</code> (no casting)</p></td> |
| </tr> |
| <tr> |
| <th>allow_truncated_timestamps</th> |
| <td><p>Allow loss of data when coercing timestamps to a |
| particular resolution. E.g. if microsecond or nanosecond data is lost when coercing |
| to "ms", do not raise an exception</p></td> |
| </tr> |
| <tr> |
| <th>properties</th> |
| <td><p>A <code>ParquetWriterProperties</code> object, used instead of the options |
| enumerated in this function's signature. Providing <code>properties</code> as an argument |
| is deprecated; if you need to assemble <code>ParquetWriterProperties</code> outside |
| of <code>write_parquet()</code>, use <code>ParquetFileWriter</code> instead.</p></td> |
| </tr> |
| <tr> |
| <th>arrow_properties</th> |
| <td><p>A <code>ParquetArrowWriterProperties</code> object. Like |
| <code>properties</code>, this argument is deprecated.</p></td> |
| </tr> |
| </table> |
| |
| <h2 class="hasAnchor" id="value"><a class="anchor" href="#value"></a>Value</h2> |
| |
| <p>the input <code>x</code> invisibly.</p> |
| <h2 class="hasAnchor" id="details"><a class="anchor" href="#details"></a>Details</h2> |
| |
| <p>Due to features of the format, Parquet files cannot be appended to. |
| If you want to use the Parquet format but also want the ability to extend |
| your dataset, you can write to additional Parquet files and then treat |
| the whole directory of files as a <a href='Dataset.html'>Dataset</a> you can query. |
| See <code><a href='../articles/dataset.html'>vignette("dataset", package = "arrow")</a></code> for examples of this.</p> |
| <p>The parameters <code>compression</code>, <code>compression_level</code>, <code>use_dictionary</code> and |
| <code>write_statistics</code> support various patterns:</p><ul> |
| <li><p>The default <code>NULL</code> leaves the parameter unspecified, and the C++ library |
| uses an appropriate default for each column (defaults listed above)</p></li> |
| <li><p>A single, unnamed, value (e.g. a single string for <code>compression</code>) applies to all columns</p></li> |
| <li><p>An unnamed vector, of the same size as the number of columns, to specify a |
| value for each column, in positional order</p></li> |
| <li><p>A named vector, to specify the value for the named columns, the default |
| value for the setting is used when not supplied</p></li> |
| </ul> |
| |
| <p>The <code>compression</code> argument can be any of the following (case insensitive): |
| "uncompressed", "snappy", "gzip", "brotli", "zstd", "lz4", "lzo" or "bz2". |
| Only "uncompressed" is guaranteed to be available, but "snappy" and "gzip" |
| are almost always included. See <code><a href='codec_is_available.html'>codec_is_available()</a></code>. |
| The default "snappy" is used if available, otherwise "uncompressed". To |
| disable compression, set <code>compression = "uncompressed"</code>. |
| Note that "uncompressed" columns may still have dictionary encoding.</p> |
| |
| <h2 class="hasAnchor" id="examples"><a class="anchor" href="#examples"></a>Examples</h2> |
| <pre class="examples"><div class='input'><span class='kw'>if</span> <span class='op'>(</span><span class='cn'>FALSE</span><span class='op'>)</span> <span class='op'>{</span> |
| <span class='va'>tf1</span> <span class='op'><-</span> <span class='fu'><a href='https://rdrr.io/r/base/tempfile.html'>tempfile</a></span><span class='op'>(</span>fileext <span class='op'>=</span> <span class='st'>".parquet"</span><span class='op'>)</span> |
| <span class='fu'>write_parquet</span><span class='op'>(</span><span class='fu'><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></span><span class='op'>(</span>x <span class='op'>=</span> <span class='fl'>1</span><span class='op'>:</span><span class='fl'>5</span><span class='op'>)</span>, <span class='va'>tf1</span><span class='op'>)</span> |
| |
| <span class='co'># using compression</span> |
| <span class='kw'>if</span> <span class='op'>(</span><span class='fu'><a href='codec_is_available.html'>codec_is_available</a></span><span class='op'>(</span><span class='st'>"gzip"</span><span class='op'>)</span><span class='op'>)</span> <span class='op'>{</span> |
| <span class='va'>tf2</span> <span class='op'><-</span> <span class='fu'><a href='https://rdrr.io/r/base/tempfile.html'>tempfile</a></span><span class='op'>(</span>fileext <span class='op'>=</span> <span class='st'>".gz.parquet"</span><span class='op'>)</span> |
| <span class='fu'>write_parquet</span><span class='op'>(</span><span class='fu'><a href='https://rdrr.io/r/base/data.frame.html'>data.frame</a></span><span class='op'>(</span>x <span class='op'>=</span> <span class='fl'>1</span><span class='op'>:</span><span class='fl'>5</span><span class='op'>)</span>, <span class='va'>tf2</span>, compression <span class='op'>=</span> <span class='st'>"gzip"</span>, compression_level <span class='op'>=</span> <span class='fl'>5</span><span class='op'>)</span> |
| <span class='op'>}</span> |
| <span class='op'>}</span> |
| </div></pre> |
| </div> |
| <div class="col-md-3 hidden-xs hidden-sm" id="pkgdown-sidebar"> |
| <nav id="toc" data-toggle="toc" class="sticky-top"> |
| <h2 data-toc-skip>Contents</h2> |
| </nav> |
| </div> |
| </div> |
| |
| |
| <footer> |
| <div class="copyright"> |
| <p>Developed by Neal Richardson, Ian Cook, Jonathan Keane, Romain François, Jeroen Ooms, Apache Arrow.</p> |
| </div> |
| |
| <div class="pkgdown"> |
| <p>Site built with <a href="https://pkgdown.r-lib.org/">pkgdown</a> 1.6.1.</p> |
| </div> |
| |
| </footer> |
| </div> |
| |
| |
| |
| |
| <script type="text/javascript" src="/docs/_static/versionwarning.js"></script> </body> |
| </html> |
| |
| |