r/R/dataset-format.R - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 #' Dataset file formats
 #'
 #' @description
 #' A `FileFormat` holds information about how to read and parse the files
 #' included in a `Dataset`. There are subclasses corresponding to the supported
 #' file formats (`ParquetFileFormat` and `IpcFileFormat`).
 #'
 #' @section Factory:
 #' `FileFormat$create()` takes the following arguments:
 #' * `format`: A string identifier of the file format. Currently supported values:
 #'   * "parquet"
 #'   * "ipc"/"arrow"/"feather", all aliases for each other; for Feather, note that
 #'     only version 2 files are supported
 #'   * "csv"/"text", aliases for the same thing (because comma is the default
 #'     delimiter for text files
 #'   * "tsv", equivalent to passing `format = "text", delimiter = "\t"`
 #' * `...`: Additional format-specific options
 #'
 #'   `format = "parquet"`:
 #'   * `dict_columns`: Names of columns which should be read as dictionaries.
 #'   * Any Parquet options from [FragmentScanOptions].
 #'
 #'   `format = "text"`: see [CsvParseOptions]. Note that you can specify them either
 #'   with the Arrow C++ library naming ("delimiter", "quoting", etc.) or the
 #'   `readr`-style naming used in [read_csv_arrow()] ("delim", "quote", etc.).
 #'   Not all `readr` options are currently supported; please file an issue if
 #'   you encounter one that `arrow` should support. Also, the following options are
 #'   supported. From [CsvReadOptions]:
 #'   * `skip_rows`
 #'   * `column_names`. Note that if a [Schema] is specified, `column_names` must match those specified in the schema.
 #'   * `autogenerate_column_names`
 #'   From [CsvFragmentScanOptions] (these values can be overridden at scan time):
 #'   * `convert_options`: a [CsvConvertOptions]
 #'   * `block_size`
 #'
 #' It returns the appropriate subclass of `FileFormat` (e.g. `ParquetFileFormat`)
 #' @rdname FileFormat
 #' @name FileFormat
 #' @examplesIf arrow_with_dataset()
 #' ## Semi-colon delimited files
 #' # Set up directory for examples
 #' tf <- tempfile()
 #' dir.create(tf)
 #' on.exit(unlink(tf))
 #' write.table(mtcars, file.path(tf, "file1.txt"), sep = ";", row.names = FALSE)
 #'
 #' # Create FileFormat object
 #' format <- FileFormat$create(format = "text", delimiter = ";")
 #'
 #' open_dataset(tf, format = format)
 #' @export
 FileFormat <- R6Class("FileFormat",
   inherit = ArrowObject,
   active = list(
     # @description
     # Return the `FileFormat`'s type
     type = function() dataset___FileFormat__type_name(self)
   )
 )
 FileFormat$create <- function(format, schema = NULL, ...) {
   opt_names <- names(list(...))
   if (format %in% c("csv", "text") || any(opt_names %in% c("delim", "delimiter"))) {
     CsvFileFormat$create(schema = schema, ...)
   } else if (format == "tsv") {
     CsvFileFormat$create(delimiter = "\t", schema = schema, ...)
   } else if (format == "parquet") {
     ParquetFileFormat$create(...)
   } else if (format %in% c("ipc", "arrow", "feather")) { # These are aliases for the same thing
     dataset___IpcFileFormat__Make()
   } else if (format == "json") {
     JsonFileFormat$create(...)
   } else {
     stop("Unsupported file format: ", format, call. = FALSE)
   }
 }

 #' @export
 as.character.FileFormat <- function(x, ...) {
   out <- x$type
   # Slight hack: special case IPC -> feather, otherwise is just the type_name
   ifelse(out == "ipc", "feather", out)
 }

 #' @usage NULL
 #' @format NULL
 #' @rdname FileFormat
 #' @export
 ParquetFileFormat <- R6Class("ParquetFileFormat", inherit = FileFormat)
 ParquetFileFormat$create <- function(...,
                                      dict_columns = character(0)) {
   options <- ParquetFragmentScanOptions$create(...)
   dataset___ParquetFileFormat__Make(options, dict_columns)
 }

 #' @usage NULL
 #' @format NULL
 #' @rdname FileFormat
 #' @export
 IpcFileFormat <- R6Class("IpcFileFormat", inherit = FileFormat)

 #' JSON dataset file format
 #'
 #' @description
 #' A `JsonFileFormat` is a [FileFormat] subclass which holds information about how to
 #' read and parse the files included in a JSON `Dataset`.
 #'
 #' @section Factory:
 #' `JsonFileFormat$create()` can take options in the form of lists passed through as `parse_options`,
 #'  or `read_options` parameters.
 #'
 #'  Available `read_options` parameters:
 #'  * `use_threads`: Whether to use the global CPU thread pool. Default `TRUE`. If `FALSE`, JSON input must end with an
 #'  empty line.
 #'  * `block_size`: Block size we request from the IO layer; also determines size of chunks when `use_threads`
 #'   is `TRUE`.
 #'
 #'  Available `parse_options` parameters:
 #'  * `newlines_in_values`:Logical: are values allowed to contain CR (`0x0d` or `\r`) and LF (`0x0a` or `\n`)
 #'  characters? (default `FALSE`)
 #'
 #' @return A `JsonFileFormat` object
 #' @rdname JsonFileFormat
 #' @name JsonFileFormat
 #' @seealso [FileFormat]
 #' @examplesIf arrow_with_dataset()
 #'
 #' @export
 JsonFileFormat <- R6Class("JsonFileFormat", inherit = FileFormat)
 JsonFileFormat$create <- function(...) {
   dots <- list2(...)
   parse_opt_choices <- dots[names(dots) %in% names(formals(JsonParseOptions$create))]
   read_opt_choices <- dots[names(dots) %in% names(formals(JsonReadOptions$create))]

   parse_options <- do.call(JsonParseOptions$create, parse_opt_choices)
   read_options <- do.call(JsonReadOptions$create, read_opt_choices)
   dataset___JsonFileFormat__Make(parse_options, read_options)
 }


 #' CSV dataset file format
 #'
 #' @description
 #' A `CSVFileFormat` is a [FileFormat] subclass which holds information about how to
 #' read and parse the files included in a CSV `Dataset`.
 #'
 #' @section Factory:
 #' `CSVFileFormat$create()` can take options in the form of lists passed through as `parse_options`,
 #'  `read_options`, or `convert_options` parameters.  Alternatively, readr-style options can be passed
 #'  through individually.  While it is possible to pass in `CSVReadOptions`, `CSVConvertOptions`, and `CSVParseOptions`
 #'  objects, this is not recommended as options set in these objects are not validated for compatibility.
 #'
 #' @return A `CsvFileFormat` object
 #' @rdname CsvFileFormat
 #' @name CsvFileFormat
 #' @seealso [FileFormat]
 #' @examplesIf arrow_with_dataset()
 #' # Set up directory for examples
 #' tf <- tempfile()
 #' dir.create(tf)
 #' on.exit(unlink(tf))
 #' df <- data.frame(x = c("1", "2", "NULL"))
 #' write.table(df, file.path(tf, "file1.txt"), sep = ",", row.names = FALSE)
 #'
 #' # Create CsvFileFormat object with Arrow-style null_values option
 #' format <- CsvFileFormat$create(convert_options = list(null_values = c("", "NA", "NULL")))
 #' open_dataset(tf, format = format)
 #'
 #' # Use readr-style options
 #' format <- CsvFileFormat$create(na = c("", "NA", "NULL"))
 #' open_dataset(tf, format = format)
 #'
 #' @export
 CsvFileFormat <- R6Class("CsvFileFormat", inherit = FileFormat)
 CsvFileFormat$create <- function(...) {
   dots <- list(...)
   options <- check_csv_file_format_args(dots)
   check_schema(options[["schema"]], options[["read_options"]]$column_names)

   dataset___CsvFileFormat__Make(options$parse_options, options$convert_options, options$read_options)
 }

 # Check all arguments are valid
 check_csv_file_format_args <- function(args) {
   options <- list(
     parse_options = args$parse_options,
     convert_options = args$convert_options,
     read_options = args$read_options,
     schema = args$schema
   )

   check_unsupported_args(args)
   check_unrecognised_args(args)

   # Evaluate parse_options first to catch any unsupported arguments
   if (is.null(args$parse_options)) {
     options$parse_options <- do.call(csv_file_format_parse_opts, args)
   } else if (is.list(args$parse_options)) {
     options$parse_options <- do.call(CsvParseOptions$create, args$parse_options)
   }

   if (is.null(args$convert_options)) {
     options$convert_options <- do.call(csv_file_format_convert_opts, args)
   } else if (is.list(args$convert_options)) {
     options$convert_options <- do.call(CsvConvertOptions$create, args$convert_options)
   }

   if (is.null(args$read_options)) {
     options$read_options <- do.call(csv_file_format_read_opts, args)
   } else if (is.list(args$read_options)) {
     options$read_options <- do.call(CsvReadOptions$create, args$read_options)
   }

   options
 }

 check_unsupported_args <- function(args) {
   opt_names <- get_opt_names(args)

   # Filter out arguments meant for CsvConvertOptions/CsvReadOptions
   supported_convert_opts <- c(names(formals(CsvConvertOptions$create)), "na")

   supported_read_opts <- c(
     names(formals(CsvReadOptions$create)),
     names(formals(readr_to_csv_read_options))
   )

   # We only currently support all of the readr options for parseoptions
   supported_parse_opts <- c(
     names(formals(CsvParseOptions$create)),
     names(formals(readr_to_csv_parse_options))
   )

   # Catch any readr-style options specified with full option names that are
   # supported by read_delim_arrow() (and its wrappers) but are not yet
   # supported here
   unsup_readr_opts <- setdiff(
     names(formals(read_delim_arrow)),
     c(supported_convert_opts, supported_read_opts, supported_parse_opts, "schema")
   )

   is_unsup_opt <- opt_names %in% unsup_readr_opts
   unsup_opts <- opt_names[is_unsup_opt]
   if (length(unsup_opts)) {
     stop(
       "The following ",
       ngettext(length(unsup_opts), "option is ", "options are "),
       "supported in \"read_delim_arrow\" functions ",
       "but not yet supported here: ",
       oxford_paste(unsup_opts),
       call. = FALSE
     )
   }
 }

 # unlists "parse_options", "convert_options", "read_options" and returns them along with
 # names of options passed in individually via args.  `get_opt_names()` ignores any
 # CSV*Options objects passed in as these are not validated - users must ensure they've
 # chosen reasonable values in this case.
 get_opt_names <- function(args) {
   opt_names <- names(args)

   # extract names of parse_options, read_options, and convert_options
   if ("parse_options" %in% names(args) && is.list(args[["parse_options"]])) {
     opt_names <- c(opt_names, names(args[["parse_options"]]))
   }

   if ("read_options" %in% names(args) && is.list(args[["read_options"]])) {
     opt_names <- c(opt_names, names(args[["read_options"]]))
   }

   if ("convert_options" %in% names(args) && is.list(args[["convert_options"]])) {
     opt_names <- c(opt_names, names(args[["convert_options"]]))
   }

   setdiff(opt_names, c("parse_options", "read_options", "convert_options"))
 }

 check_unrecognised_args <- function(opts) {
   # Catch any options with full or partial names that do not match any of the
   # recognized Arrow C++ option names or readr-style option names
   opt_names <- get_opt_names(opts)

   arrow_opts <- c(
     names(formals(CsvParseOptions$create)),
     names(formals(CsvReadOptions$create)),
     names(formals(CsvConvertOptions$create)),
     "schema"
   )

   readr_opts <- c(
     names(formals(readr_to_csv_parse_options)),
     names(formals(readr_to_csv_read_options)),
     "na"
   )

   is_arrow_opt <- !is.na(pmatch(opt_names, arrow_opts))
   is_readr_opt <- !is.na(pmatch(opt_names, readr_opts))
   unrec_opts <- opt_names[!is_arrow_opt & !is_readr_opt]
   if (length(unrec_opts)) {
     stop(
       "Unrecognized ",
       ngettext(length(unrec_opts), "option", "options"),
       ": ",
       oxford_paste(unrec_opts),
       call. = FALSE
     )
   }
 }

 check_ambiguous_options <- function(passed_opts, opts1, opts2) {
   is_ambig_opt <- is.na(pmatch(passed_opts, c(opts1, opts2)))
   ambig_opts <- passed_opts[is_ambig_opt]
   if (length(ambig_opts)) {
     stop("Ambiguous ",
       ngettext(length(ambig_opts), "option", "options"),
       ": ",
       oxford_paste(ambig_opts),
       ". Use full argument names",
       call. = FALSE
     )
   }
 }

 check_schema <- function(schema, column_names) {
   if (!is.null(schema) && !inherits(schema, "Schema")) {
     abort(paste0(
       "`schema` must be an object of class 'Schema' not '",
       class(schema)[1],
       "'."
     ))
   }

   schema_names <- names(schema)

   if (!is.null(schema) && !identical(schema_names, column_names)) {
     missing_from_schema <- setdiff(column_names, schema_names)
     missing_from_colnames <- setdiff(schema_names, column_names)
     message_colnames <- NULL
     message_schema <- NULL
     message_order <- NULL

     if (length(missing_from_colnames) > 0) {
       message_colnames <- paste(
         oxford_paste(missing_from_colnames, quote_symbol = "`"),
         "not present in `column_names`"
       )
     }

     if (length(missing_from_schema) > 0) {
       message_schema <- paste(
         oxford_paste(missing_from_schema, quote_symbol = "`"),
         "not present in `schema`"
       )
     }

     if (length(missing_from_schema) == 0 && length(missing_from_colnames) == 0) {
       message_order <- "`column_names` and `schema` field names match but are not in the same order"
     }

     abort(
       c(
         "Values in `column_names` must match `schema` field names",
         x = message_order,
         x = message_schema,
         x = message_colnames
       )
     )
   }
 }

 # Support both readr-style option names and Arrow C++ option names
 csv_file_format_parse_opts <- function(...) {
   opts <- list(...)
   # Filter out arguments meant for CsvConvertOptions/CsvReadOptions
   convert_opts <- c(names(formals(CsvConvertOptions$create)), "na", "convert_options")
   read_opts <- c(
     names(formals(CsvReadOptions$create)),
     names(formals(readr_to_csv_read_options)),
     "read_options"
   )
   opts[convert_opts] <- NULL
   opts[read_opts] <- NULL
   opts[["schema"]] <- NULL
   opts[["parse_options"]] <- NULL
   opt_names <- get_opt_names(opts)

   arrow_opts <- c(names(formals(CsvParseOptions$create)))
   readr_opts <- c(names(formals(readr_to_csv_parse_options)))

   is_arrow_opt <- !is.na(pmatch(opt_names, arrow_opts))
   is_readr_opt <- !is.na(pmatch(opt_names, readr_opts))
   # Catch options with ambiguous partial names (such as "del") that make it
   # unclear whether the user is specifying Arrow C++ options ("delimiter") or
   # readr-style options ("delim")
   check_ambiguous_options(opt_names, arrow_opts, readr_opts)

   if (any(is_readr_opt)) {
     # Catch cases when the user specifies a mix of Arrow C++ options and
     # readr-style options
     if (!all(is_readr_opt)) {
       stop("Use either Arrow parse options or readr parse options, not both",
         call. = FALSE
       )
     }
     do.call(readr_to_csv_parse_options, opts) # all options have readr-style names
   } else {
     do.call(CsvParseOptions$create, opts) # all options have Arrow C++ names
   }
 }

 csv_file_format_convert_opts <- function(...) {
   opts <- list(...)
   # Filter out arguments meant for CsvParseOptions/CsvReadOptions
   arrow_opts <- c(names(formals(CsvParseOptions$create)), "parse_options")
   readr_opts <- names(formals(readr_to_csv_parse_options))
   read_opts <- c(
     names(formals(CsvReadOptions$create)),
     names(formals(readr_to_csv_read_options)),
     "read_options"
   )
   opts[arrow_opts] <- NULL
   opts[readr_opts] <- NULL
   opts[read_opts] <- NULL
   opts[["schema"]] <- NULL
   opts[["convert_options"]] <- NULL

   # map "na" to "null_values"
   if ("na" %in% names(opts)) {
     opts[["null_values"]] <- opts[["na"]]
     opts[["na"]] <- NULL
   }

   do.call(CsvConvertOptions$create, opts)
 }

 csv_file_format_read_opts <- function(schema = NULL, ...) {
   opts <- list(...)
   # Filter out arguments meant for CsvParseOptions/CsvConvertOptions
   arrow_opts <- c(names(formals(CsvParseOptions$create)), "parse_options")
   readr_opts <- names(formals(readr_to_csv_parse_options))
   convert_opts <- c(names(formals(CsvConvertOptions$create)), "na", "convert_options")
   opts[arrow_opts] <- NULL
   opts[readr_opts] <- NULL
   opts[convert_opts] <- NULL
   opts[["read_options"]] <- NULL

   opt_names <- names(opts)
   arrow_opts <- c(names(formals(CsvReadOptions$create)))
   readr_opts <- c(names(formals(readr_to_csv_read_options)))

   is_arrow_opt <- !is.na(match(opt_names, arrow_opts))
   is_readr_opt <- !is.na(match(opt_names, readr_opts))

   check_ambiguous_options(opt_names, arrow_opts, readr_opts)

   null_or_true <- function(x) {
     is.null(x) || isTRUE(x)
   }

   if (!is.null(schema) && null_or_true(opts[["column_names"]]) && null_or_true(opts[["col_names"]])) {
     if (any(is_readr_opt)) {
       opts[["col_names"]] <- names(schema)
     } else {
       opts[["column_names"]] <- names(schema)
     }
   }

   if (any(is_readr_opt)) {
     # Catch cases when the user specifies a mix of Arrow C++ options and
     # readr-style options
     if (!all(is_readr_opt)) {
       abort(c(
         "Additional CSV reading options must be Arrow-style or readr-style, but not both.",
         i = sprintf("Arrow options used: %s.", oxford_paste(opt_names[is_arrow_opt])),
         i = sprintf("readr options used: %s.", oxford_paste(opt_names[is_readr_opt]))
       ))
     }
     do.call(readr_to_csv_read_options, opts) # all options have readr-style names
   } else {
     do.call(CsvReadOptions$create, opts) # all options have Arrow C++ names
   }
 }

 #' Format-specific scan options
 #'
 #' @description
 #' A `FragmentScanOptions` holds options specific to a `FileFormat` and a scan
 #' operation.
 #'
 #' @section Factory:
 #' `FragmentScanOptions$create()` takes the following arguments:
 #' * `format`: A string identifier of the file format. Currently supported values:
 #'   * "parquet"
 #'   * "csv"/"text", aliases for the same format.
 #' * `...`: Additional format-specific options
 #'
 #'   `format = "parquet"`:
 #'   * `use_buffered_stream`: Read files through buffered input streams rather than
 #'                            loading entire row groups at once. This may be enabled
 #'                            to reduce memory overhead. Disabled by default.
 #'   * `buffer_size`: Size of buffered stream, if enabled. Default is 8KB.
 #'   * `pre_buffer`: Pre-buffer the raw Parquet data. This can improve performance
 #'                   on high-latency filesystems. Disabled by default.
 #
 #'   `format = "text"`: see [CsvConvertOptions]. Note that options can only be
 #'   specified with the Arrow C++ library naming. Also, "block_size" from
 #'   [CsvReadOptions] may be given.
 #'
 #' It returns the appropriate subclass of `FragmentScanOptions`
 #' (e.g. `CsvFragmentScanOptions`).
 #' @rdname FragmentScanOptions
 #' @name FragmentScanOptions
 #' @export
 FragmentScanOptions <- R6Class("FragmentScanOptions",
   inherit = ArrowObject,
   active = list(
     # @description
     # Return the `FragmentScanOptions`'s type
     type = function() dataset___FragmentScanOptions__type_name(self)
   )
 )
 FragmentScanOptions$create <- function(format, ...) {
   if (format %in% c("csv", "text", "tsv")) {
     CsvFragmentScanOptions$create(...)
   } else if (format == "parquet") {
     ParquetFragmentScanOptions$create(...)
   } else if (format == "json") {
     JsonFragmentScanOptions$create(...)
   } else {
     stop("Unsupported file format: ", format, call. = FALSE)
   }
 }

 #' @export
 as.character.FragmentScanOptions <- function(x, ...) {
   x$type
 }

 #' @usage NULL
 #' @format NULL
 #' @rdname FragmentScanOptions
 #' @export
 CsvFragmentScanOptions <- R6Class("CsvFragmentScanOptions", inherit = FragmentScanOptions)
 CsvFragmentScanOptions$create <- function(...,
                                           convert_opts = csv_file_format_convert_opts(...),
                                           read_opts = csv_file_format_read_opts(...)) {
   dataset___CsvFragmentScanOptions__Make(convert_opts, read_opts)
 }

 #' @usage NULL
 #' @format NULL
 #' @rdname FragmentScanOptions
 #' @export
 ParquetFragmentScanOptions <- R6Class("ParquetFragmentScanOptions", inherit = FragmentScanOptions)
 ParquetFragmentScanOptions$create <- function(use_buffered_stream = FALSE,
                                               buffer_size = 8196,
                                               pre_buffer = TRUE) {
   dataset___ParquetFragmentScanOptions__Make(use_buffered_stream, buffer_size, pre_buffer)
 }

 #' @usage NULL
 #' @format NULL
 #' @rdname FragmentScanOptions
 #' @export
 JsonFragmentScanOptions <- R6Class("JsonFragmentScanOptions", inherit = FragmentScanOptions)
 JsonFragmentScanOptions$create <- function(...) {
   dots <- list2(...)
   valid_parse_options <- names(formals(JsonParseOptions$create))
   valid_read_options <- names(formals(JsonReadOptions$create))
   valid_options <- c(valid_parse_options, valid_read_options)

   parse_opt_choices <- dots[names(dots) %in% valid_parse_options]
   read_opt_choices <- dots[names(dots) %in% valid_read_options]

   if (length(setdiff(names(dots), valid_options)) > 0) {
     abort(
       c(
         paste("`JsonFragmentScanOptions` must match one or more of:", oxford_paste(valid_options, quote_symbol = "`")),
         i = paste("Invalid selection(s):", oxford_paste(setdiff(names(dots), valid_options), quote_symbol = "`"))
       )
     )
   }

   parse_options <- do.call(JsonParseOptions$create, parse_opt_choices)
   read_options <- do.call(JsonReadOptions$create, read_opt_choices)

   dataset___JsonFragmentScanOptions__Make(parse_options, read_options)
 }

 #' Format-specific write options
 #'
 #' @description
 #' A `FileWriteOptions` holds write options specific to a `FileFormat`.
 FileWriteOptions <- R6Class("FileWriteOptions",
   inherit = ArrowObject,
   public = list(
     update = function(column_names, ...) {
       check_additional_args <- function(format, passed_args) {
         if (format == "parquet") {
           supported_args <- names(formals(write_parquet))
           supported_args <- supported_args[supported_args != c("x", "sink")]
         } else if (format == "ipc") {
           supported_args <- c(
             "use_legacy_format",
             "metadata_version",
             "codec",
             "null_fallback"
           )
         } else if (format == "csv") {
           supported_args <- c(
             names(formals(CsvWriteOptions$create)),
             names(formals(readr_to_csv_write_options))
           )
         }

         unsupported_passed_args <- setdiff(passed_args, supported_args)

         if (length(unsupported_passed_args) > 0) {
           err_header <- paste0(
             oxford_paste(unsupported_passed_args, quote_symbol = "`"),
             ngettext(
               length(unsupported_passed_args),
               " is not a valid argument ",
               " are not valid arguments "
             ),
             "for your chosen `format`."
           )
           err_info <- NULL
           arg_info <- paste0(
             "Supported arguments: ",
             oxford_paste(unique(supported_args), quote_symbol = "`"),
             "."
           )
           if ("compression" %in% unsupported_passed_args) {
             err_info <- "You could try using `codec` instead of `compression`."
           }
           abort(c(err_header, i = err_info, i = arg_info))
         }
       }

       args <- list(...)
       check_additional_args(self$type, names(args))

       if (self$type == "parquet") {
         dataset___ParquetFileWriteOptions__update(
           self,
           ParquetWriterProperties$create(column_names, ...),
           ParquetArrowWriterProperties$create(...)
         )
       } else if (self$type == "ipc") {
         if (is.null(args$codec)) {
           dataset___IpcFileWriteOptions__update1(
             self,
             get_ipc_use_legacy_format(args$use_legacy_format),
             get_ipc_metadata_version(args$metadata_version)
           )
         } else {
           dataset___IpcFileWriteOptions__update2(
             self,
             get_ipc_use_legacy_format(args$use_legacy_format),
             args$codec,
             get_ipc_metadata_version(args$metadata_version)
           )
         }
       } else if (self$type == "csv") {
         arrow_opts <- names(formals(CsvWriteOptions$create))
         readr_opts <- names(formals(readr_to_csv_write_options))
         readr_only_opts <- setdiff(readr_opts, arrow_opts)

         is_arrow_opt <- !is.na(pmatch(names(args), arrow_opts))
         is_readr_opt <- !is.na(pmatch(names(args), readr_opts))
         is_readr_only_opt <- !is.na(pmatch(names(args), readr_only_opts))

         # These option names aren't mutually exclusive, so only use readr path
         # if we have at least one readr-specific option.
         if (sum(is_readr_only_opt)) {
           dataset___CsvFileWriteOptions__update(
             self,
             do.call(readr_to_csv_write_options, args[is_readr_opt])
           )
         } else {
           dataset___CsvFileWriteOptions__update(
             self,
             do.call(CsvWriteOptions$create, args[is_arrow_opt])
           )
         }
       }
       invisible(self)
     }
   ),
   active = list(
     type = function() dataset___FileWriteOptions__type_name(self)
   )
 )
 FileWriteOptions$create <- function(format, ...) {
   if (!inherits(format, "FileFormat")) {
     format <- FileFormat$create(format)
   }
   options <- dataset___FileFormat__DefaultWriteOptions(format)
   options$update(...)
 }