r/man/CsvReadOptions.Rd - arrow - Git at Google

 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/csv.R, R/json.R
 \docType{class}
 \name{CsvReadOptions}
 \alias{CsvReadOptions}
 \alias{CsvWriteOptions}
 \alias{CsvParseOptions}
 \alias{TimestampParser}
 \alias{CsvConvertOptions}
 \alias{JsonReadOptions}
 \alias{JsonParseOptions}
 \title{File reader options}
 \description{
 \code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
 \code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
 file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
 \code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
 }
 \section{Factory}{


 The \code{CsvReadOptions$create()} and \code{JsonReadOptions$create()} factory methods
 take the following arguments:
 \itemize{
 \item \code{use_threads} Whether to use the global CPU thread pool
 \item \code{block_size} Block size we request from the IO layer; also determines
 the size of chunks when use_threads is \code{TRUE}. NB: if \code{FALSE}, JSON input
 must end with an empty line.
 }

 \code{CsvReadOptions$create()} further accepts these additional arguments:
 \itemize{
 \item \code{skip_rows} Number of lines to skip before reading data (default 0).
 \item \code{column_names} Character vector to supply column names. If length-0
 (the default), the first non-skipped row will be parsed to generate column
 names, unless \code{autogenerate_column_names} is \code{TRUE}.
 \item \code{autogenerate_column_names} Logical: generate column names instead of
 using the first non-skipped row (the default)? If \code{TRUE}, column names will
 be "f0", "f1", ..., "fN".
 \item \code{encoding} The file encoding. (default \code{"UTF-8"})
 \item \code{skip_rows_after_names} Number of lines to skip after the column names (default 0).
 This number can be larger than the number of rows in one block, and empty rows are counted.
 The order of application is as follows:
 \itemize{
 \item \code{skip_rows} is applied (if non-zero);
 \item column names are read (unless \code{column_names} is set);
 \item \code{skip_rows_after_names} is applied (if non-zero).
 }
 }

 \code{CsvParseOptions$create()} takes the following arguments:
 \itemize{
 \item \code{delimiter} Field delimiting character (default \code{","})
 \item \code{quoting} Logical: are strings quoted? (default \code{TRUE})
 \item \code{quote_char} Quoting character, if \code{quoting} is \code{TRUE}
 \item \code{double_quote} Logical: are quotes inside values double-quoted? (default \code{TRUE})
 \item \code{escaping} Logical: whether escaping is used (default \code{FALSE})
 \item \code{escape_char} Escaping character, if \code{escaping} is \code{TRUE}
 \item \code{newlines_in_values} Logical: are values allowed to contain CR (\code{0x0d})
 and LF (\code{0x0a}) characters? (default \code{FALSE})
 \item \code{ignore_empty_lines} Logical: should empty lines be ignored (default) or
 generate a row of missing values (if \code{FALSE})?
 }

 \code{JsonParseOptions$create()} accepts only the \code{newlines_in_values} argument.

 \code{CsvConvertOptions$create()} takes the following arguments:
 \itemize{
 \item \code{check_utf8} Logical: check UTF8 validity of string columns? (default \code{TRUE})
 \item \code{null_values} character vector of recognized spellings for null values.
 Analogous to the \code{na.strings} argument to
 \code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{\link[readr:read_delim]{readr::read_csv()}}.
 \item \code{strings_can_be_null} Logical: can string / binary columns have
 null values? Similar to the \code{quoted_na} argument to \code{\link[readr:read_delim]{readr::read_csv()}}.
 (default \code{FALSE})
 \item \code{true_values} character vector of recognized spellings for \code{TRUE} values
 \item \code{false_values} character vector of recognized spellings for \code{FALSE} values
 \item \code{col_types} A \code{Schema} or \code{NULL} to infer types
 \item \code{auto_dict_encode} Logical: Whether to try to automatically
 dictionary-encode string / binary data (think \code{stringsAsFactors}). Default \code{FALSE}.
 This setting is ignored for non-inferred columns (those in \code{col_types}).
 \item \code{auto_dict_max_cardinality} If \code{auto_dict_encode}, string/binary columns
 are dictionary-encoded up to this number of unique values (default 50),
 after which it switches to regular encoding.
 \item \code{include_columns} If non-empty, indicates the names of columns from the
 CSV file that should be actually read and converted (in the vector's order).
 \item \code{include_missing_columns} Logical: if \code{include_columns} is provided, should
 columns named in it but not found in the data be included as a column of
 type \code{null()}? The default (\code{FALSE}) means that the reader will instead
 raise an error.
 \item \code{timestamp_parsers} User-defined timestamp parsers. If more than one
 parser is specified, the CSV conversion logic will try parsing values
 starting from the beginning of this vector. Possible values are
 (a) \code{NULL}, the default, which uses the ISO-8601 parser;
 (b) a character vector of \link[base:strptime]{strptime} parse strings; or
 (c) a list of \link{TimestampParser} objects.
 }

 \code{TimestampParser$create()} takes an optional \code{format} string argument.
 See \code{\link[base:strptime]{strptime()}} for example syntax.
 The default is to use an ISO-8601 format parser.

 The \code{CsvWriteOptions$create()} factory method takes the following arguments:
 \itemize{
 \item \code{include_header} Whether to write an initial header line with column names
 \item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
 \item \code{null_string} The string to be written for null values. Must not contain
 quotation marks. Default is an empty string (\code{""}).
 }
 }

 \section{Active bindings}{

 \itemize{
 \item \code{column_names}: from \code{CsvReadOptions}
 }
 }
	% Generated by roxygen2: do not edit by hand
	% Please edit documentation in R/csv.R, R/json.R
	\docType{class}
	\name{CsvReadOptions}
	\alias{CsvReadOptions}
	\alias{CsvWriteOptions}
	\alias{CsvParseOptions}
	\alias{TimestampParser}
	\alias{CsvConvertOptions}
	\alias{JsonReadOptions}
	\alias{JsonParseOptions}
	\title{File reader options}
	\description{
	\code{CsvReadOptions}, \code{CsvParseOptions}, \code{CsvConvertOptions},
	\code{JsonReadOptions}, \code{JsonParseOptions}, and \code{TimestampParser} are containers for various
	file reading options. See their usage in \code{\link[=read_csv_arrow]{read_csv_arrow()}} and
	\code{\link[=read_json_arrow]{read_json_arrow()}}, respectively.
	}
	\section{Factory}{


	The \code{CsvReadOptions$create()} and \code{JsonReadOptions$create()} factory methods
	take the following arguments:
	\itemize{
	\item \code{use_threads} Whether to use the global CPU thread pool
	\item \code{block_size} Block size we request from the IO layer; also determines
	the size of chunks when use_threads is \code{TRUE}. NB: if \code{FALSE}, JSON input
	must end with an empty line.
	}

	\code{CsvReadOptions$create()} further accepts these additional arguments:
	\itemize{
	\item \code{skip_rows} Number of lines to skip before reading data (default 0).
	\item \code{column_names} Character vector to supply column names. If length-0
	(the default), the first non-skipped row will be parsed to generate column
	names, unless \code{autogenerate_column_names} is \code{TRUE}.
	\item \code{autogenerate_column_names} Logical: generate column names instead of
	using the first non-skipped row (the default)? If \code{TRUE}, column names will
	be "f0", "f1", ..., "fN".
	\item \code{encoding} The file encoding. (default \code{"UTF-8"})
	\item \code{skip_rows_after_names} Number of lines to skip after the column names (default 0).
	This number can be larger than the number of rows in one block, and empty rows are counted.
	The order of application is as follows:
	\itemize{
	\item \code{skip_rows} is applied (if non-zero);
	\item column names are read (unless \code{column_names} is set);
	\item \code{skip_rows_after_names} is applied (if non-zero).
	}
	}

	\code{CsvParseOptions$create()} takes the following arguments:
	\itemize{
	\item \code{delimiter} Field delimiting character (default \code{","})
	\item \code{quoting} Logical: are strings quoted? (default \code{TRUE})
	\item \code{quote_char} Quoting character, if \code{quoting} is \code{TRUE}
	\item \code{double_quote} Logical: are quotes inside values double-quoted? (default \code{TRUE})
	\item \code{escaping} Logical: whether escaping is used (default \code{FALSE})
	\item \code{escape_char} Escaping character, if \code{escaping} is \code{TRUE}
	\item \code{newlines_in_values} Logical: are values allowed to contain CR (\code{0x0d})
	and LF (\code{0x0a}) characters? (default \code{FALSE})
	\item \code{ignore_empty_lines} Logical: should empty lines be ignored (default) or
	generate a row of missing values (if \code{FALSE})?
	}

	\code{JsonParseOptions$create()} accepts only the \code{newlines_in_values} argument.

	\code{CsvConvertOptions$create()} takes the following arguments:
	\itemize{
	\item \code{check_utf8} Logical: check UTF8 validity of string columns? (default \code{TRUE})
	\item \code{null_values} character vector of recognized spellings for null values.
	Analogous to the \code{na.strings} argument to
	\code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{\link[readr:read_delim]{readr::read_csv()}}.
	\item \code{strings_can_be_null} Logical: can string / binary columns have
	null values? Similar to the \code{quoted_na} argument to \code{\link[readr:read_delim]{readr::read_csv()}}.
	(default \code{FALSE})
	\item \code{true_values} character vector of recognized spellings for \code{TRUE} values
	\item \code{false_values} character vector of recognized spellings for \code{FALSE} values
	\item \code{col_types} A \code{Schema} or \code{NULL} to infer types
	\item \code{auto_dict_encode} Logical: Whether to try to automatically
	dictionary-encode string / binary data (think \code{stringsAsFactors}). Default \code{FALSE}.
	This setting is ignored for non-inferred columns (those in \code{col_types}).
	\item \code{auto_dict_max_cardinality} If \code{auto_dict_encode}, string/binary columns
	are dictionary-encoded up to this number of unique values (default 50),
	after which it switches to regular encoding.
	\item \code{include_columns} If non-empty, indicates the names of columns from the
	CSV file that should be actually read and converted (in the vector's order).
	\item \code{include_missing_columns} Logical: if \code{include_columns} is provided, should
	columns named in it but not found in the data be included as a column of
	type \code{null()}? The default (\code{FALSE}) means that the reader will instead
	raise an error.
	\item \code{timestamp_parsers} User-defined timestamp parsers. If more than one
	parser is specified, the CSV conversion logic will try parsing values
	starting from the beginning of this vector. Possible values are
	(a) \code{NULL}, the default, which uses the ISO-8601 parser;
	(b) a character vector of \link[base:strptime]{strptime} parse strings; or
	(c) a list of \link{TimestampParser} objects.
	}

	\code{TimestampParser$create()} takes an optional \code{format} string argument.
	See \code{\link[base:strptime]{strptime()}} for example syntax.
	The default is to use an ISO-8601 format parser.

	The \code{CsvWriteOptions$create()} factory method takes the following arguments:
	\itemize{
	\item \code{include_header} Whether to write an initial header line with column names
	\item \code{batch_size} Maximum number of rows processed at a time. Default is 1024.
	\item \code{null_string} The string to be written for null values. Must not contain
	quotation marks. Default is an empty string (\code{""}).
	}
	}

	\section{Active bindings}{

	\itemize{
	\item \code{column_names}: from \code{CsvReadOptions}
	}
	}