blob: d088692708b49955a97b5ba7da1828c9c271df48 [file] [log] [blame]
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/csv.R, R/json.R
\docType{class}
\name{CsvReadOptions}
\alias{CsvReadOptions}
\alias{CsvWriteOptions}
\alias{CsvParseOptions}
\alias{TimestampParser}
\alias{CsvConvertOptions}
\alias{JsonReadOptions}
\alias{JsonParseOptions}
\title{File reader options}
\description{
\code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
\code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
\code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
}
\section{Factory}{
The \code{CsvReadOptions$create()} and \code{JsonReadOptions$create()} factory methods
take the following arguments:
\itemize{
\item \code{use_threads} Whether to use the global CPU thread pool
\item \code{block_size} Block size we request from the IO layer; also determines
the size of chunks when use_threads is \code{TRUE}. NB: if \code{FALSE}, JSON input
must end with an empty line.
}
\code{CsvReadOptions$create()} further accepts these additional arguments:
\itemize{
\item \code{skip_rows} Number of lines to skip before reading data (default 0)
\item \code{column_names} Character vector to supply column names. If length-0
(the default), the first non-skipped row will be parsed to generate column
names, unless \code{autogenerate_column_names} is \code{TRUE}.
\item \code{autogenerate_column_names} Logical: generate column names instead of
using the first non-skipped row (the default)? If \code{TRUE}, column names will
be "f0", "f1", ..., "fN".
}
\code{CsvParseOptions$create()} takes the following arguments:
\itemize{
\item \code{delimiter} Field delimiting character (default \code{","})
\item \code{quoting} Logical: are strings quoted? (default \code{TRUE})
\item \code{quote_char} Quoting character, if \code{quoting} is \code{TRUE}
\item \code{double_quote} Logical: are quotes inside values double-quoted? (default \code{TRUE})
\item \code{escaping} Logical: whether escaping is used (default \code{FALSE})
\item \code{escape_char} Escaping character, if \code{escaping} is \code{TRUE}
\item \code{newlines_in_values} Logical: are values allowed to contain CR (\code{0x0d})
and LF (\code{0x0a}) characters? (default \code{FALSE})
\item \code{ignore_empty_lines} Logical: should empty lines be ignored (default) or
generate a row of missing values (if \code{FALSE})?
}
\code{JsonParseOptions$create()} accepts only the \code{newlines_in_values} argument.
\code{CsvConvertOptions$create()} takes the following arguments:
\itemize{
\item \code{check_utf8} Logical: check UTF8 validity of string columns? (default \code{TRUE})
\item \code{null_values} character vector of recognized spellings for null values.
Analogous to the \code{na.strings} argument to
\code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{readr::read_csv()}.
\item \code{strings_can_be_null} Logical: can string / binary columns have
null values? Similar to the \code{quoted_na} argument to \code{readr::read_csv()}.
(default \code{FALSE})
\item \code{true_values} character vector of recognized spellings for \code{TRUE} values
\item \code{false_values} character vector of recognized spellings for \code{FALSE} values
\item \code{col_types} A \code{Schema} or \code{NULL} to infer types
\item \code{auto_dict_encode} Logical: Whether to try to automatically
dictionary-encode string / binary data (think \code{stringsAsFactors}). Default \code{FALSE}.
This setting is ignored for non-inferred columns (those in \code{col_types}).
\item \code{auto_dict_max_cardinality} If \code{auto_dict_encode}, string/binary columns
are dictionary-encoded up to this number of unique values (default 50),
after which it switches to regular encoding.
\item \code{include_columns} If non-empty, indicates the names of columns from the
CSV file that should be actually read and converted (in the vector's order).
\item \code{include_missing_columns} Logical: if \code{include_columns} is provided, should
columns named in it but not found in the data be included as a column of
type \code{null()}? The default (\code{FALSE}) means that the reader will instead
raise an error.
\item \code{timestamp_parsers} User-defined timestamp parsers. If more than one
parser is specified, the CSV conversion logic will try parsing values
starting from the beginning of this vector. Possible values are
(a) \code{NULL}, the default, which uses the ISO-8601 parser;
(b) a character vector of \link[base:strptime]{strptime} parse strings; or
(c) a list of \link{TimestampParser} objects.
}
\code{TimestampParser$create()} takes an optional \code{format} string argument.
See \code{\link[base:strptime]{strptime()}} for example syntax.
The default is to use an ISO-8601 format parser.
The \code{CsvWriteOptions$create()} factory method takes the following arguments:
\itemize{
\item \code{include_header} Whether to write an initial header line with column names
\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
}
}
\section{Active bindings}{
\itemize{
\item \code{column_names}: from \code{CsvReadOptions}
}
}