r/R/record-batch.R - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 #' @include arrow-package.R
 #' @include array.R
 #' @title RecordBatch class
 #' @description A record batch is a collection of equal-length arrays matching
 #' a particular [Schema]. It is a table-like data structure that is semantically
 #' a sequence of [fields][Field], each a contiguous Arrow [Array].
 #' @usage NULL
 #' @format NULL
 #' @docType class
 #'
 #' @section S3 Methods and Usage:
 #' Record batches are data-frame-like, and many methods you expect to work on
 #' a `data.frame` are implemented for `RecordBatch`. This includes `[`, `[[`,
 #' `$`, `names`, `dim`, `nrow`, `ncol`, `head`, and `tail`. You can also pull
 #' the data from an Arrow record batch into R with `as.data.frame()`. See the
 #' examples.
 #'
 #' A caveat about the `$` method: because `RecordBatch` is an `R6` object,
 #' `$` is also used to access the object's methods (see below). Methods take
 #' precedence over the table's columns. So, `batch$Slice` would return the
 #' "Slice" method function even if there were a column in the table called
 #' "Slice".
 #'
 #' @section R6 Methods:
 #' In addition to the more R-friendly S3 methods, a `RecordBatch` object has
 #' the following R6 methods that map onto the underlying C++ methods:
 #'
 #' - `$Equals(other)`: Returns `TRUE` if the `other` record batch is equal
 #' - `$column(i)`: Extract an `Array` by integer position from the batch
 #' - `$column_name(i)`: Get a column's name by integer position
 #' - `$names()`: Get all column names (called by `names(batch)`)
 #' - `$GetColumnByName(name)`: Extract an `Array` by string name
 #' - `$RemoveColumn(i)`: Drops a column from the batch by integer position
 #' - `$selectColumns(indices)`: Return a new record batch with a selection of columns, expressed as 0-based integers.
 #' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the
 #'    indicated integer offset and going for the given length, or to the end
 #'    of the table if `NULL`, the default.
 #' - `$Take(i)`: return an `RecordBatch` with rows at positions given by
 #'    integers (R vector or Array Array) `i`.
 #' - `$Filter(i, keep_na = TRUE)`: return an `RecordBatch` with rows at positions where logical
 #'    vector (or Arrow boolean Array) `i` is `TRUE`.
 #' - `$serialize()`: Returns a raw vector suitable for interprocess communication
 #' - `$cast(target_schema, safe = TRUE, options = cast_options(safe))`: Alter
 #'    the schema of the record batch.
 #'
 #' There are also some active bindings
 #' - `$num_columns`
 #' - `$num_rows`
 #' - `$schema`
 #' - `$metadata`: Returns the key-value metadata of the `Schema` as a named list.
 #'    Modify or replace by assigning in (`batch$metadata <- new_metadata`).
 #'    All list elements are coerced to string.
 #' - `$columns`: Returns a list of `Array`s
 #' @rdname RecordBatch
 #' @name RecordBatch
 RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject,
   public = list(
     column = function(i) shared_ptr(Array, RecordBatch__column(self, i)),
     column_name = function(i) RecordBatch__column_name(self, i),
     names = function() RecordBatch__names(self),
     Equals = function(other, check_metadata = FALSE, ...) {
       inherits(other, "RecordBatch") && RecordBatch__Equals(self, other, isTRUE(check_metadata))
     },
     GetColumnByName = function(name) {
       assert_that(is.string(name))
       shared_ptr(Array, RecordBatch__GetColumnByName(self, name))
     },
     SelectColumns = function(indices) {
       shared_ptr(RecordBatch, RecordBatch__SelectColumns(self, indices))
     },
     RemoveColumn = function(i){
       shared_ptr(RecordBatch, RecordBatch__RemoveColumn(self, i))
     },
     Slice = function(offset, length = NULL) {
       if (is.null(length)) {
         shared_ptr(RecordBatch, RecordBatch__Slice1(self, offset))
       } else {
         shared_ptr(RecordBatch, RecordBatch__Slice2(self, offset, length))
       }
     },
     Take = function(i) {
       if (is.numeric(i)) {
         i <- as.integer(i)
       }
       if (is.integer(i)) {
         i <- Array$create(i)
       }
       assert_is(i, "Array")
       shared_ptr(RecordBatch, call_function("take", self, i))
     },
     Filter = function(i, keep_na = TRUE) {
       if (is.logical(i)) {
         i <- Array$create(i)
       }
       assert_that(is.Array(i, "bool"))
       shared_ptr(RecordBatch, call_function("filter", self, i, options = list(keep_na = keep_na)))
     },
     serialize = function() ipc___SerializeRecordBatch__Raw(self),
     ToString = function() ToString_tabular(self),

     cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
       assert_is(target_schema, "Schema")
       assert_is(options, "CastOptions")
       assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
       shared_ptr(RecordBatch, RecordBatch__cast(self, target_schema, options))
     }
   ),

   active = list(
     num_columns = function() RecordBatch__num_columns(self),
     num_rows = function() RecordBatch__num_rows(self),
     schema = function() shared_ptr(Schema, RecordBatch__schema(self)),
     metadata = function(new) {
       if (missing(new)) {
         # Get the metadata (from the schema)
         self$schema$metadata
       } else {
         # Set the metadata
         new <- prepare_key_value_metadata(new)
         out <- RecordBatch__ReplaceSchemaMetadata(self, new)
         # ReplaceSchemaMetadata returns a new object but we're modifying in place,
         # so swap in that new C++ object pointer into our R6 object
         self$set_pointer(out)
         self
       }
     },
     columns = function() map(RecordBatch__columns(self), shared_ptr, Array)
   )
 )

 RecordBatch$create <- function(..., schema = NULL) {
   arrays <- list2(...)
   if (length(arrays) == 1 && inherits(arrays[[1]], c("raw", "Buffer", "InputStream", "Message"))) {
     return(RecordBatch$from_message(arrays[[1]], schema))
   }
   # Else, list of arrays
   # making sure there are always names
   if (is.null(names(arrays))) {
     names(arrays) <- rep_len("", length(arrays))
   }
   stopifnot(length(arrays) > 0)
   # TODO: should this also assert that they're all Arrays?
   shared_ptr(RecordBatch, RecordBatch__from_arrays(schema, arrays))
 }

 RecordBatch$from_message <- function(obj, schema) {
   # Message/Buffer readers, previously in read_record_batch()
   assert_is(schema, "Schema")
   if (inherits(obj, c("raw", "Buffer"))) {
     obj <- BufferReader$create(obj)
     on.exit(obj$close())
   }
   if (inherits(obj, "InputStream")) {
     shared_ptr(RecordBatch, ipc___ReadRecordBatch__InputStream__Schema(obj, schema))
   } else {
     shared_ptr(RecordBatch, ipc___ReadRecordBatch__Message__Schema(obj, schema))
   }
 }

 #' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
 #' mixture of data.frames and vectors, the inputs will be autospliced together
 #' (see examples). Alternatively, you can provide a single Arrow IPC
 #' `InputStream`, `Message`, `Buffer`, or R `raw` object containing a `Buffer`.
 #' @param schema a [Schema], or `NULL` (the default) to infer the schema from
 #' the data in `...`. When providing an Arrow IPC buffer, `schema` is required.
 #' @rdname RecordBatch
 #' @examples
 #' \donttest{
 #' batch <- record_batch(name = rownames(mtcars), mtcars)
 #' dim(batch)
 #' dim(head(batch))
 #' names(batch)
 #' batch$mpg
 #' batch[["cyl"]]
 #' as.data.frame(batch[4:8, c("gear", "hp", "wt")])
 #' }
 #' @export
 record_batch <- RecordBatch$create

 #' @export
 names.RecordBatch <- function(x) x$names()

 #' @importFrom methods as
 #' @export
 `[.RecordBatch` <- function(x, i, j, ..., drop = FALSE) {
   if (nargs() == 2L) {
     # List-like column extraction (x[i])
     return(x[, i])
   }
   if (!missing(j)) {
     # Selecting columns is cheaper than filtering rows, so do it first.
     # That way, if we're filtering too, we have fewer arrays to filter/slice/take
     if (is_integerish(j)) {
       if (all(j < 0)) {
         # in R, negative j means "everything but j"
         j <- setdiff(seq_len(x$num_columns), -1 * j)
       }
       x <- x$SelectColumns(as.integer(j) - 1L)
     } else if (is.character(j)) {
       x <- x$SelectColumns(match(j, names(x)) - 1L)
     }

     if (drop && ncol(x) == 1L) {
       x <- x$column(0)
     }
   }
   if (!missing(i)) {
     x <- filter_rows(x, i, ...)
   }
   x
 }

 #' @export
 `[[.RecordBatch` <- function(x, i, ...) {
   if (is.character(i)) {
     x$GetColumnByName(i)
   } else if (is.numeric(i)) {
     x$column(i - 1)
   } else {
     stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
   }
 }

 #' @export
 `$.RecordBatch` <- function(x, name, ...) {
   assert_that(is.string(name))
   if (name %in% ls(x)) {
     get(name, x)
   } else {
     x$GetColumnByName(name)
   }
 }

 #' @export
 dim.RecordBatch <- function(x) {
   c(x$num_rows, x$num_columns)
 }

 #' @export
 as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, ...) {
   df <- RecordBatch__to_dataframe(x, use_threads = option_use_threads())
   if (!is.null(r_metadata <- x$metadata$r)) {
     df <- apply_arrow_r_metadata(df, .unserialize_arrow_r_metadata(r_metadata))
   }
   df
 }

 .serialize_arrow_r_metadata <- function(x) {
   rawToChar(serialize(x, NULL, ascii = TRUE))
 }

 .unserialize_arrow_r_metadata <- function(x) {
   tryCatch(unserialize(charToRaw(x)), error = function(e) {
     warning("Invalid metadata$r", call. = FALSE)
     NULL
   })
 }

 apply_arrow_r_metadata <- function(x, r_metadata) {
   tryCatch({
     if (!is.null(r_metadata$attributes)) {
       attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes
       if (inherits(x, "POSIXlt")) {
         # We store POSIXlt as a StructArray, which is translated back to R
         # as a data.frame, but while data frames have a row.names = c(NA, nrow(x))
         # attribute, POSIXlt does not, so since this is now no longer an object
         # of class data.frame, remove the extraneous attribute
         attr(x, "row.names") <- NULL
       }
     }

     columns_metadata <- r_metadata$columns
     if (length(names(x)) && !is.null(columns_metadata)) {
       for (name in intersect(names(columns_metadata), names(x))) {
         x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
       }
     }
   }, error = function(e) {
     warning("Invalid metadata$r", call. = FALSE)
   })
   x
 }

 #' @export
 as.list.RecordBatch <- function(x, ...) as.list(as.data.frame(x, ...))

 #' @export
 row.names.RecordBatch <- function(x) as.character(seq_len(nrow(x)))

 #' @export
 dimnames.RecordBatch <- function(x) list(row.names(x), names(x))

 #' @export
 head.RecordBatch <- head.Array

 #' @export
 tail.RecordBatch <- tail.Array

 ToString_tabular <- function(x, ...) {
   # Generic to work with both RecordBatch and Table
   sch <- unlist(strsplit(x$schema$ToString(), "\n"))
   sch <- sub("(.*): (.*)", "$\\1 <\\2>", sch)
   dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x))
   paste(c(dims, sch), collapse = "\n")
 }
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	#' @include arrow-package.R
	#' @include array.R
	#' @title RecordBatch class
	#' @description A record batch is a collection of equal-length arrays matching
	#' a particular [Schema]. It is a table-like data structure that is semantically
	#' a sequence of [fields][Field], each a contiguous Arrow [Array].
	#' @usage NULL
	#' @format NULL
	#' @docType class
	#'
	#' @section S3 Methods and Usage:
	#' Record batches are data-frame-like, and many methods you expect to work on
	#' a `data.frame` are implemented for `RecordBatch`. This includes `[`, `[[`,
	#' `$`, `names`, `dim`, `nrow`, `ncol`, `head`, and `tail`. You can also pull
	#' the data from an Arrow record batch into R with `as.data.frame()`. See the
	#' examples.
	#'
	#' A caveat about the `$` method: because `RecordBatch` is an `R6` object,
	#' `$` is also used to access the object's methods (see below). Methods take
	#' precedence over the table's columns. So, `batch$Slice` would return the
	#' "Slice" method function even if there were a column in the table called
	#' "Slice".
	#'
	#' @section R6 Methods:
	#' In addition to the more R-friendly S3 methods, a `RecordBatch` object has
	#' the following R6 methods that map onto the underlying C++ methods:
	#'
	#' - `$Equals(other)`: Returns `TRUE` if the `other` record batch is equal
	#' - `$column(i)`: Extract an `Array` by integer position from the batch
	#' - `$column_name(i)`: Get a column's name by integer position
	#' - `$names()`: Get all column names (called by `names(batch)`)
	#' - `$GetColumnByName(name)`: Extract an `Array` by string name
	#' - `$RemoveColumn(i)`: Drops a column from the batch by integer position
	#' - `$selectColumns(indices)`: Return a new record batch with a selection of columns, expressed as 0-based integers.
	#' - `$Slice(offset, length = NULL)`: Create a zero-copy view starting at the
	#' indicated integer offset and going for the given length, or to the end
	#' of the table if `NULL`, the default.
	#' - `$Take(i)`: return an `RecordBatch` with rows at positions given by
	#' integers (R vector or Array Array) `i`.
	#' - `$Filter(i, keep_na = TRUE)`: return an `RecordBatch` with rows at positions where logical
	#' vector (or Arrow boolean Array) `i` is `TRUE`.
	#' - `$serialize()`: Returns a raw vector suitable for interprocess communication
	#' - `$cast(target_schema, safe = TRUE, options = cast_options(safe))`: Alter
	#' the schema of the record batch.
	#'
	#' There are also some active bindings
	#' - `$num_columns`
	#' - `$num_rows`
	#' - `$schema`
	#' - `$metadata`: Returns the key-value metadata of the `Schema` as a named list.
	#' Modify or replace by assigning in (`batch$metadata <- new_metadata`).
	#' All list elements are coerced to string.
	#' - `$columns`: Returns a list of `Array`s
	#' @rdname RecordBatch
	#' @name RecordBatch
	RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject,
	public = list(
	column = function(i) shared_ptr(Array, RecordBatch__column(self, i)),
	column_name = function(i) RecordBatch__column_name(self, i),
	names = function() RecordBatch__names(self),
	Equals = function(other, check_metadata = FALSE, ...) {
	inherits(other, "RecordBatch") && RecordBatch__Equals(self, other, isTRUE(check_metadata))
	},
	GetColumnByName = function(name) {
	assert_that(is.string(name))
	shared_ptr(Array, RecordBatch__GetColumnByName(self, name))
	},
	SelectColumns = function(indices) {
	shared_ptr(RecordBatch, RecordBatch__SelectColumns(self, indices))
	},
	RemoveColumn = function(i){
	shared_ptr(RecordBatch, RecordBatch__RemoveColumn(self, i))
	},
	Slice = function(offset, length = NULL) {
	if (is.null(length)) {
	shared_ptr(RecordBatch, RecordBatch__Slice1(self, offset))
	} else {
	shared_ptr(RecordBatch, RecordBatch__Slice2(self, offset, length))
	}
	},
	Take = function(i) {
	if (is.numeric(i)) {
	i <- as.integer(i)
	}
	if (is.integer(i)) {
	i <- Array$create(i)
	}
	assert_is(i, "Array")
	shared_ptr(RecordBatch, call_function("take", self, i))
	},
	Filter = function(i, keep_na = TRUE) {
	if (is.logical(i)) {
	i <- Array$create(i)
	}
	assert_that(is.Array(i, "bool"))
	shared_ptr(RecordBatch, call_function("filter", self, i, options = list(keep_na = keep_na)))
	},
	serialize = function() ipc___SerializeRecordBatch__Raw(self),
	ToString = function() ToString_tabular(self),

	cast = function(target_schema, safe = TRUE, options = cast_options(safe)) {
	assert_is(target_schema, "Schema")
	assert_is(options, "CastOptions")
	assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas")
	shared_ptr(RecordBatch, RecordBatch__cast(self, target_schema, options))
	}
	),

	active = list(
	num_columns = function() RecordBatch__num_columns(self),
	num_rows = function() RecordBatch__num_rows(self),
	schema = function() shared_ptr(Schema, RecordBatch__schema(self)),
	metadata = function(new) {
	if (missing(new)) {
	# Get the metadata (from the schema)
	self$schema$metadata
	} else {
	# Set the metadata
	new <- prepare_key_value_metadata(new)
	out <- RecordBatch__ReplaceSchemaMetadata(self, new)
	# ReplaceSchemaMetadata returns a new object but we're modifying in place,
	# so swap in that new C++ object pointer into our R6 object
	self$set_pointer(out)
	self
	}
	},
	columns = function() map(RecordBatch__columns(self), shared_ptr, Array)
	)
	)

	RecordBatch$create <- function(..., schema = NULL) {
	arrays <- list2(...)
	if (length(arrays) == 1 && inherits(arrays[[1]], c("raw", "Buffer", "InputStream", "Message"))) {
	return(RecordBatch$from_message(arrays[[1]], schema))
	}
	# Else, list of arrays
	# making sure there are always names
	if (is.null(names(arrays))) {
	names(arrays) <- rep_len("", length(arrays))
	}
	stopifnot(length(arrays) > 0)
	# TODO: should this also assert that they're all Arrays?
	shared_ptr(RecordBatch, RecordBatch__from_arrays(schema, arrays))
	}

	RecordBatch$from_message <- function(obj, schema) {
	# Message/Buffer readers, previously in read_record_batch()
	assert_is(schema, "Schema")
	if (inherits(obj, c("raw", "Buffer"))) {
	obj <- BufferReader$create(obj)
	on.exit(obj$close())
	}
	if (inherits(obj, "InputStream")) {
	shared_ptr(RecordBatch, ipc___ReadRecordBatch__InputStream__Schema(obj, schema))
	} else {
	shared_ptr(RecordBatch, ipc___ReadRecordBatch__Message__Schema(obj, schema))
	}
	}

	#' @param ... A `data.frame` or a named set of Arrays or vectors. If given a
	#' mixture of data.frames and vectors, the inputs will be autospliced together
	#' (see examples). Alternatively, you can provide a single Arrow IPC
	#' `InputStream`, `Message`, `Buffer`, or R `raw` object containing a `Buffer`.
	#' @param schema a [Schema], or `NULL` (the default) to infer the schema from
	#' the data in `...`. When providing an Arrow IPC buffer, `schema` is required.
	#' @rdname RecordBatch
	#' @examples
	#' \donttest{
	#' batch <- record_batch(name = rownames(mtcars), mtcars)
	#' dim(batch)
	#' dim(head(batch))
	#' names(batch)
	#' batch$mpg
	#' batch[["cyl"]]
	#' as.data.frame(batch[4:8, c("gear", "hp", "wt")])
	#' }
	#' @export
	record_batch <- RecordBatch$create

	#' @export
	names.RecordBatch <- function(x) x$names()

	#' @importFrom methods as
	#' @export
	`[.RecordBatch` <- function(x, i, j, ..., drop = FALSE) {
	if (nargs() == 2L) {
	# List-like column extraction (x[i])
	return(x[, i])
	}
	if (!missing(j)) {
	# Selecting columns is cheaper than filtering rows, so do it first.
	# That way, if we're filtering too, we have fewer arrays to filter/slice/take
	if (is_integerish(j)) {
	if (all(j < 0)) {
	# in R, negative j means "everything but j"
	j <- setdiff(seq_len(x$num_columns), -1 * j)
	}
	x <- x$SelectColumns(as.integer(j) - 1L)
	} else if (is.character(j)) {
	x <- x$SelectColumns(match(j, names(x)) - 1L)
	}

	if (drop && ncol(x) == 1L) {
	x <- x$column(0)
	}
	}
	if (!missing(i)) {
	x <- filter_rows(x, i, ...)
	}
	x
	}

	#' @export
	`[[.RecordBatch` <- function(x, i, ...) {
	if (is.character(i)) {
	x$GetColumnByName(i)
	} else if (is.numeric(i)) {
	x$column(i - 1)
	} else {
	stop("'i' must be character or numeric, not ", class(i), call. = FALSE)
	}
	}

	#' @export
	`$.RecordBatch` <- function(x, name, ...) {
	assert_that(is.string(name))
	if (name %in% ls(x)) {
	get(name, x)
	} else {
	x$GetColumnByName(name)
	}
	}

	#' @export
	dim.RecordBatch <- function(x) {
	c(x$num_rows, x$num_columns)
	}

	#' @export
	as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, ...) {
	df <- RecordBatch__to_dataframe(x, use_threads = option_use_threads())
	if (!is.null(r_metadata <- x$metadata$r)) {
	df <- apply_arrow_r_metadata(df, .unserialize_arrow_r_metadata(r_metadata))
	}
	df
	}

	.serialize_arrow_r_metadata <- function(x) {
	rawToChar(serialize(x, NULL, ascii = TRUE))
	}

	.unserialize_arrow_r_metadata <- function(x) {
	tryCatch(unserialize(charToRaw(x)), error = function(e) {
	warning("Invalid metadata$r", call. = FALSE)
	NULL
	})
	}

	apply_arrow_r_metadata <- function(x, r_metadata) {
	tryCatch({
	if (!is.null(r_metadata$attributes)) {
	attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes
	if (inherits(x, "POSIXlt")) {
	# We store POSIXlt as a StructArray, which is translated back to R
	# as a data.frame, but while data frames have a row.names = c(NA, nrow(x))
	# attribute, POSIXlt does not, so since this is now no longer an object
	# of class data.frame, remove the extraneous attribute
	attr(x, "row.names") <- NULL
	}
	}

	columns_metadata <- r_metadata$columns
	if (length(names(x)) && !is.null(columns_metadata)) {
	for (name in intersect(names(columns_metadata), names(x))) {
	x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]])
	}
	}
	}, error = function(e) {
	warning("Invalid metadata$r", call. = FALSE)
	})
	x
	}

	#' @export
	as.list.RecordBatch <- function(x, ...) as.list(as.data.frame(x, ...))

	#' @export
	row.names.RecordBatch <- function(x) as.character(seq_len(nrow(x)))

	#' @export
	dimnames.RecordBatch <- function(x) list(row.names(x), names(x))

	#' @export
	head.RecordBatch <- head.Array

	#' @export
	tail.RecordBatch <- tail.Array

	ToString_tabular <- function(x, ...) {
	# Generic to work with both RecordBatch and Table
	sch <- unlist(strsplit(x$schema$ToString(), "\n"))
	sch <- sub("(.): (.)", "$\\1 <\\2>", sch)
	dims <- sprintf("%s rows x %s columns", nrow(x), ncol(x))
	paste(c(dims, sch), collapse = "\n")
	}