blob: 124abdcb91281e12e4445ea1c31fea7e5b4b0b24 [file] [log] [blame]
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csv.R
\name{read_delim_arrow}
\alias{read_delim_arrow}
\alias{read_csv_arrow}
\alias{read_tsv_arrow}
\title{Read a CSV or other delimited file with Arrow}
\usage{
read_delim_arrow(
file,
delim = ",",
quote = "\\"",
escape_double = TRUE,
escape_backslash = FALSE,
col_names = TRUE,
col_select = NULL,
na = c("", "NA"),
quoted_na = TRUE,
skip_empty_rows = TRUE,
skip = 0L,
parse_options = NULL,
convert_options = NULL,
read_options = NULL,
as_data_frame = TRUE
)
read_csv_arrow(
file,
quote = "\\"",
escape_double = TRUE,
escape_backslash = FALSE,
col_names = TRUE,
col_select = NULL,
na = c("", "NA"),
quoted_na = TRUE,
skip_empty_rows = TRUE,
skip = 0L,
parse_options = NULL,
convert_options = NULL,
read_options = NULL,
as_data_frame = TRUE
)
read_tsv_arrow(
file,
quote = "\\"",
escape_double = TRUE,
escape_backslash = FALSE,
col_names = TRUE,
col_select = NULL,
na = c("", "NA"),
quoted_na = TRUE,
skip_empty_rows = TRUE,
skip = 0L,
parse_options = NULL,
convert_options = NULL,
read_options = NULL,
as_data_frame = TRUE
)
}
\arguments{
\item{file}{A character file name, \code{raw} vector, or an Arrow input stream.
If a file name, a memory-mapped Arrow \link{InputStream} will be opened and
closed when finished; compression will be detected from the file extension
and handled automatically. If an input stream is provided, it will be left
open.}
\item{delim}{Single character used to separate fields within a record.}
\item{quote}{Single character used to quote strings.}
\item{escape_double}{Does the file escape quotes by doubling them?
i.e. If this option is \code{TRUE}, the value \verb{""""} represents
a single quote, \verb{\\"}.}
\item{escape_backslash}{Does the file use backslashes to escape special
characters? This is more general than \code{escape_double} as backslashes
can be used to escape the delimiter character, the quote character, or
to add special characters like \verb{\\\\n}.}
\item{col_names}{If \code{TRUE}, the first row of the input will be used as the
column names and will not be included in the data frame. If \code{FALSE}, column
names will be generated by Arrow, starting with "f0", "f1", ..., "fN".
Alternatively, you can specify a character vector of column names.}
\item{col_select}{A character vector of column names to keep, as in the
"select" argument to \code{data.table::fread()}, or a
\link[tidyselect:vars_select]{tidy selection specification}
of columns, as used in \code{dplyr::select()}.}
\item{na}{A character vector of strings to interpret as missing values.}
\item{quoted_na}{Should missing values inside quotes be treated as missing
values (the default) or strings. (Note that this is different from the
the Arrow C++ default for the corresponding convert option,
\code{strings_can_be_null}.)}
\item{skip_empty_rows}{Should blank rows be ignored altogether? If
\code{TRUE}, blank rows will not be represented at all. If \code{FALSE}, they will be
filled with missings.}
\item{skip}{Number of lines to skip before reading data.}
\item{parse_options}{see \link[=CsvReadOptions]{file reader options}.
If given, this overrides any
parsing options provided in other arguments (e.g. \code{delim}, \code{quote}, etc.).}
\item{convert_options}{see \link[=CsvReadOptions]{file reader options}}
\item{read_options}{see \link[=CsvReadOptions]{file reader options}}
\item{as_data_frame}{Should the function return a \code{data.frame} (default) or
an Arrow \link{Table}?}
}
\value{
A \code{data.frame}, or a Table if \code{as_data_frame = FALSE}.
}
\description{
These functions uses the Arrow C++ CSV reader to read into a \code{data.frame}.
Arrow C++ options have been mapped to argument names that follow those of
\code{readr::read_delim()}, and \code{col_select} was inspired by \code{vroom::vroom()}.
}
\details{
\code{read_csv_arrow()} and \code{read_tsv_arrow()} are wrappers around
\code{read_delim_arrow()} that specify a delimiter.
Note that not all \code{readr} options are currently implemented here. Please file
an issue if you encounter one that \code{arrow} should support.
If you need to control Arrow-specific reader parameters that don't have an
equivalent in \code{readr::read_csv()}, you can either provide them in the
\code{parse_options}, \code{convert_options}, or \code{read_options} arguments, or you can
use \link{CsvTableReader} directly for lower-level access.
}
\examples{
\donttest{
tf <- tempfile()
on.exit(unlink(tf))
write.csv(mtcars, file = tf)
df <- read_csv_arrow(tf)
dim(df)
# Can select columns
df <- read_csv_arrow(tf, col_select = starts_with("d"))
}
}