r/man/CsvReadOptions.Rd - arrow - Git at Google

 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/csv.R, R/json.R
 \docType{class}
 \name{CsvReadOptions}
 \alias{CsvReadOptions}
 \alias{CsvWriteOptions}
 \alias{CsvParseOptions}
 \alias{TimestampParser}
 \alias{CsvConvertOptions}
 \alias{JsonReadOptions}
 \alias{JsonParseOptions}
 \title{File reader options}
 \description{
 \code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
 \code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
 file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
 \code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
 }
 \section{Factory}{


 The \code{CsvReadOptions$create()} and \code{JsonReadOptions$create()} factory methods
 take the following arguments:
 \itemize{
 \item \code{use_threads} Whether to use the global CPU thread pool
 \item \code{block_size} Block size we request from the IO layer; also determines
 the size of chunks when use_threads is \code{TRUE}. NB: if \code{FALSE}, JSON input
 must end with an empty line.
 }

 \code{CsvReadOptions$create()} further accepts these additional arguments:
 \itemize{
 \item \code{skip_rows} Number of lines to skip before reading data (default 0)
 \item \code{column_names} Character vector to supply column names. If length-0
 (the default), the first non-skipped row will be parsed to generate column
 names, unless \code{autogenerate_column_names} is \code{TRUE}.
 \item \code{autogenerate_column_names} Logical: generate column names instead of
 using the first non-skipped row (the default)? If \code{TRUE}, column names will
 be "f0", "f1", ..., "fN".
 }

 \code{CsvParseOptions$create()} takes the following arguments:
 \itemize{
 \item \code{delimiter} Field delimiting character (default \code{","})
 \item \code{quoting} Logical: are strings quoted? (default \code{TRUE})
 \item \code{quote_char} Quoting character, if \code{quoting} is \code{TRUE}
 \item \code{double_quote} Logical: are quotes inside values double-quoted? (default \code{TRUE})
 \item \code{escaping} Logical: whether escaping is used (default \code{FALSE})
 \item \code{escape_char} Escaping character, if \code{escaping} is \code{TRUE}
 \item \code{newlines_in_values} Logical: are values allowed to contain CR (\code{0x0d})
 and LF (\code{0x0a}) characters? (default \code{FALSE})
 \item \code{ignore_empty_lines} Logical: should empty lines be ignored (default) or
 generate a row of missing values (if \code{FALSE})?
 }

 \code{JsonParseOptions$create()} accepts only the \code{newlines_in_values} argument.

 \code{CsvConvertOptions$create()} takes the following arguments:
 \itemize{
 \item \code{check_utf8} Logical: check UTF8 validity of string columns? (default \code{TRUE})
 \item \code{null_values} character vector of recognized spellings for null values.
 Analogous to the \code{na.strings} argument to
 \code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{readr::read_csv()}.
 \item \code{strings_can_be_null} Logical: can string / binary columns have
 null values? Similar to the \code{quoted_na} argument to \code{readr::read_csv()}.
 (default \code{FALSE})
 \item \code{true_values} character vector of recognized spellings for \code{TRUE} values
 \item \code{false_values} character vector of recognized spellings for \code{FALSE} values
 \item \code{col_types} A \code{Schema} or \code{NULL} to infer types
 \item \code{auto_dict_encode} Logical: Whether to try to automatically
 dictionary-encode string / binary data (think \code{stringsAsFactors}). Default \code{FALSE}.
 This setting is ignored for non-inferred columns (those in \code{col_types}).
 \item \code{auto_dict_max_cardinality} If \code{auto_dict_encode}, string/binary columns
 are dictionary-encoded up to this number of unique values (default 50),
 after which it switches to regular encoding.
 \item \code{include_columns} If non-empty, indicates the names of columns from the
 CSV file that should be actually read and converted (in the vector's order).
 \item \code{include_missing_columns} Logical: if \code{include_columns} is provided, should
 columns named in it but not found in the data be included as a column of
 type \code{null()}? The default (\code{FALSE}) means that the reader will instead
 raise an error.
 \item \code{timestamp_parsers} User-defined timestamp parsers. If more than one
 parser is specified, the CSV conversion logic will try parsing values
 starting from the beginning of this vector. Possible values are
 (a) \code{NULL}, the default, which uses the ISO-8601 parser;
 (b) a character vector of \link[base:strptime]{strptime} parse strings; or
 (c) a list of \link{TimestampParser} objects.
 }

 \code{TimestampParser$create()} takes an optional \code{format} string argument.
 See \code{\link[base:strptime]{strptime()}} for example syntax.
 The default is to use an ISO-8601 format parser.

 The \code{CsvWriteOptions$create()} factory method takes the following arguments:
 \itemize{
 \item \code{include_header} Whether to write an initial header line with column names
 \item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
 }
 }

 \section{Active bindings}{

 \itemize{
 \item \code{column_names}: from \code{CsvReadOptions}
 }
 }
	% Generated by roxygen2: do not edit by hand
	% Please edit documentation in R/csv.R, R/json.R
	\docType{class}
	\name{CsvReadOptions}
	\alias{CsvReadOptions}
	\alias{CsvWriteOptions}
	\alias{CsvParseOptions}
	\alias{TimestampParser}
	\alias{CsvConvertOptions}
	\alias{JsonReadOptions}
	\alias{JsonParseOptions}
	\title{File reader options}
	\description{
	\code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
	\code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
	file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
	\code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
	}
	\section{Factory}{


	The \code{CsvReadOptions$create()} and \code{JsonReadOptions$create()} factory methods
	take the following arguments:
	\itemize{
	\item \code{use_threads} Whether to use the global CPU thread pool
	\item \code{block_size} Block size we request from the IO layer; also determines
	the size of chunks when use_threads is \code{TRUE}. NB: if \code{FALSE}, JSON input
	must end with an empty line.
	}

	\code{CsvReadOptions$create()} further accepts these additional arguments:
	\itemize{
	\item \code{skip_rows} Number of lines to skip before reading data (default 0)
	\item \code{column_names} Character vector to supply column names. If length-0
	(the default), the first non-skipped row will be parsed to generate column
	names, unless \code{autogenerate_column_names} is \code{TRUE}.
	\item \code{autogenerate_column_names} Logical: generate column names instead of
	using the first non-skipped row (the default)? If \code{TRUE}, column names will
	be "f0", "f1", ..., "fN".
	}

	\code{CsvParseOptions$create()} takes the following arguments:
	\itemize{
	\item \code{delimiter} Field delimiting character (default \code{","})
	\item \code{quoting} Logical: are strings quoted? (default \code{TRUE})
	\item \code{quote_char} Quoting character, if \code{quoting} is \code{TRUE}
	\item \code{double_quote} Logical: are quotes inside values double-quoted? (default \code{TRUE})
	\item \code{escaping} Logical: whether escaping is used (default \code{FALSE})
	\item \code{escape_char} Escaping character, if \code{escaping} is \code{TRUE}
	\item \code{newlines_in_values} Logical: are values allowed to contain CR (\code{0x0d})
	and LF (\code{0x0a}) characters? (default \code{FALSE})
	\item \code{ignore_empty_lines} Logical: should empty lines be ignored (default) or
	generate a row of missing values (if \code{FALSE})?
	}

	\code{JsonParseOptions$create()} accepts only the \code{newlines_in_values} argument.

	\code{CsvConvertOptions$create()} takes the following arguments:
	\itemize{
	\item \code{check_utf8} Logical: check UTF8 validity of string columns? (default \code{TRUE})
	\item \code{null_values} character vector of recognized spellings for null values.
	Analogous to the \code{na.strings} argument to
	\code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{readr::read_csv()}.
	\item \code{strings_can_be_null} Logical: can string / binary columns have
	null values? Similar to the \code{quoted_na} argument to \code{readr::read_csv()}.
	(default \code{FALSE})
	\item \code{true_values} character vector of recognized spellings for \code{TRUE} values
	\item \code{false_values} character vector of recognized spellings for \code{FALSE} values
	\item \code{col_types} A \code{Schema} or \code{NULL} to infer types
	\item \code{auto_dict_encode} Logical: Whether to try to automatically
	dictionary-encode string / binary data (think \code{stringsAsFactors}). Default \code{FALSE}.
	This setting is ignored for non-inferred columns (those in \code{col_types}).
	\item \code{auto_dict_max_cardinality} If \code{auto_dict_encode}, string/binary columns
	are dictionary-encoded up to this number of unique values (default 50),
	after which it switches to regular encoding.
	\item \code{include_columns} If non-empty, indicates the names of columns from the
	CSV file that should be actually read and converted (in the vector's order).
	\item \code{include_missing_columns} Logical: if \code{include_columns} is provided, should
	columns named in it but not found in the data be included as a column of
	type \code{null()}? The default (\code{FALSE}) means that the reader will instead
	raise an error.
	\item \code{timestamp_parsers} User-defined timestamp parsers. If more than one
	parser is specified, the CSV conversion logic will try parsing values
	starting from the beginning of this vector. Possible values are
	(a) \code{NULL}, the default, which uses the ISO-8601 parser;
	(b) a character vector of \link[base:strptime]{strptime} parse strings; or
	(c) a list of \link{TimestampParser} objects.
	}

	\code{TimestampParser$create()} takes an optional \code{format} string argument.
	See \code{\link[base:strptime]{strptime()}} for example syntax.
	The default is to use an ISO-8601 format parser.

	The \code{CsvWriteOptions$create()} factory method takes the following arguments:
	\itemize{
	\item \code{include_header} Whether to write an initial header line with column names
	\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
	}
	}

	\section{Active bindings}{

	\itemize{
	\item \code{column_names}: from \code{CsvReadOptions}
	}
	}