r/man/write_dataset.Rd - arrow - Git at Google

 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/dataset-write.R
 \name{write_dataset}
 \alias{write_dataset}
 \title{Write a dataset}
 \usage{
 write_dataset(
   dataset,
   path,
   format = c("parquet", "feather", "arrow", "ipc"),
   partitioning = dplyr::group_vars(dataset),
   basename_template = paste0("part-{i}.", as.character(format)),
   hive_style = TRUE,
   ...
 )
 }
 \arguments{
 \item{dataset}{\link{Dataset}, \link{RecordBatch}, \link{Table}, \code{arrow_dplyr_query}, or
 \code{data.frame}. If an \code{arrow_dplyr_query} or \code{grouped_df},
 \code{schema} and \code{partitioning} will be taken from the result of any \code{select()}
 and \code{group_by()} operations done on the dataset. \code{filter()} queries will be
 applied to restrict written rows.
 Note that \code{select()}-ed columns may not be renamed.}

 \item{path}{string path, URI, or \code{SubTreeFileSystem} referencing a directory
 to write to (directory will be created if it does not exist)}

 \item{format}{a string identifier of the file format. Default is to use
 "parquet" (see \link{FileFormat})}

 \item{partitioning}{\code{Partitioning} or a character vector of columns to
 use as partition keys (to be written as path segments). Default is to
 use the current \code{group_by()} columns.}

 \item{basename_template}{string template for the names of files to be written.
 Must contain \code{"{i}"}, which will be replaced with an autoincremented
 integer to generate basenames of datafiles. For example, \code{"part-{i}.feather"}
 will yield \verb{"part-0.feather", ...}.}

 \item{hive_style}{logical: write partition segments as Hive-style
 (\code{key1=value1/key2=value2/file.ext}) or as just bare values. Default is \code{TRUE}.}

 \item{...}{additional format-specific arguments. For available Parquet
 options, see \code{\link[=write_parquet]{write_parquet()}}. The available Feather options are
 \itemize{
 \item \code{use_legacy_format} logical: write data formatted so that Arrow libraries
 versions 0.14 and lower can read it. Default is \code{FALSE}. You can also
 enable this by setting the environment variable \code{ARROW_PRE_0_15_IPC_FORMAT=1}.
 \item \code{metadata_version}: A string like "V5" or the equivalent integer indicating
 the Arrow IPC MetadataVersion. Default (NULL) will use the latest version,
 unless the environment variable \code{ARROW_PRE_1_0_METADATA_VERSION=1}, in
 which case it will be V4.
 \item \code{codec}: A \link{Codec} which will be used to compress body buffers of written
 files. Default (NULL) will not compress body buffers.
 \item \code{null_fallback}: character to be used in place of missing values (\code{NA} or
 \code{NULL}) when using Hive-style partitioning. See \code{\link[=hive_partition]{hive_partition()}}.
 }}
 }
 \value{
 The input \code{dataset}, invisibly
 }
 \description{
 This function allows you to write a dataset. By writing to more efficient
 binary storage formats, and by specifying relevant partitioning, you can
 make it much faster to read and query.
 }
	% Generated by roxygen2: do not edit by hand
	% Please edit documentation in R/dataset-write.R
	\name{write_dataset}
	\alias{write_dataset}
	\title{Write a dataset}
	\usage{
	write_dataset(
	dataset,
	path,
	format = c("parquet", "feather", "arrow", "ipc"),
	partitioning = dplyr::group_vars(dataset),
	basename_template = paste0("part-{i}.", as.character(format)),
	hive_style = TRUE,
	...
	)
	}
	\arguments{
	\item{dataset}{\link{Dataset}, \link{RecordBatch}, \link{Table}, \code{arrow_dplyr_query}, or
	\code{data.frame}. If an \code{arrow_dplyr_query} or \code{grouped_df},
	\code{schema} and \code{partitioning} will be taken from the result of any \code{select()}
	and \code{group_by()} operations done on the dataset. \code{filter()} queries will be
	applied to restrict written rows.
	Note that \code{select()}-ed columns may not be renamed.}

	\item{path}{string path, URI, or \code{SubTreeFileSystem} referencing a directory
	to write to (directory will be created if it does not exist)}

	\item{format}{a string identifier of the file format. Default is to use
	"parquet" (see \link{FileFormat})}

	\item{partitioning}{\code{Partitioning} or a character vector of columns to
	use as partition keys (to be written as path segments). Default is to
	use the current \code{group_by()} columns.}

	\item{basename_template}{string template for the names of files to be written.
	Must contain \code{"{i}"}, which will be replaced with an autoincremented
	integer to generate basenames of datafiles. For example, \code{"part-{i}.feather"}
	will yield \verb{"part-0.feather", ...}.}

	\item{hive_style}{logical: write partition segments as Hive-style
	(\code{key1=value1/key2=value2/file.ext}) or as just bare values. Default is \code{TRUE}.}

	\item{...}{additional format-specific arguments. For available Parquet
	options, see \code{\link[=write_parquet]{write_parquet()}}. The available Feather options are
	\itemize{
	\item \code{use_legacy_format} logical: write data formatted so that Arrow libraries
	versions 0.14 and lower can read it. Default is \code{FALSE}. You can also
	enable this by setting the environment variable \code{ARROW_PRE_0_15_IPC_FORMAT=1}.
	\item \code{metadata_version}: A string like "V5" or the equivalent integer indicating
	the Arrow IPC MetadataVersion. Default (NULL) will use the latest version,
	unless the environment variable \code{ARROW_PRE_1_0_METADATA_VERSION=1}, in
	which case it will be V4.
	\item \code{codec}: A \link{Codec} which will be used to compress body buffers of written
	files. Default (NULL) will not compress body buffers.
	\item \code{null_fallback}: character to be used in place of missing values (\code{NA} or
	\code{NULL}) when using Hive-style partitioning. See \code{\link[=hive_partition]{hive_partition()}}.
	}}
	}
	\value{
	The input \code{dataset}, invisibly
	}
	\description{
	This function allows you to write a dataset. By writing to more efficient
	binary storage formats, and by specifying relevant partitioning, you can
	make it much faster to read and query.
	}