blob: aa11c222bc55ce5e2d2c6e0d315b33512f4cf5ca [file] [log] [blame]
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/type.R
\name{data-type}
\alias{data-type}
\alias{int8}
\alias{int16}
\alias{int32}
\alias{int64}
\alias{uint8}
\alias{uint16}
\alias{uint32}
\alias{uint64}
\alias{float16}
\alias{halffloat}
\alias{float32}
\alias{float}
\alias{float64}
\alias{boolean}
\alias{bool}
\alias{utf8}
\alias{large_utf8}
\alias{binary}
\alias{large_binary}
\alias{fixed_size_binary}
\alias{string}
\alias{date32}
\alias{date64}
\alias{time32}
\alias{time64}
\alias{duration}
\alias{null}
\alias{timestamp}
\alias{decimal}
\alias{decimal32}
\alias{decimal64}
\alias{decimal128}
\alias{decimal256}
\alias{struct}
\alias{list_of}
\alias{large_list_of}
\alias{FixedSizeListType}
\alias{fixed_size_list_of}
\alias{MapType}
\alias{map_of}
\title{Create Arrow data types}
\usage{
int8()
int16()
int32()
int64()
uint8()
uint16()
uint32()
uint64()
float16()
halffloat()
float32()
float()
float64()
boolean()
bool()
utf8()
large_utf8()
binary()
large_binary()
fixed_size_binary(byte_width)
string()
date32()
date64()
time32(unit = c("ms", "s"))
time64(unit = c("ns", "us"))
duration(unit = c("s", "ms", "us", "ns"))
null()
timestamp(unit = c("s", "ms", "us", "ns"), timezone = "")
decimal(precision, scale)
decimal32(precision, scale)
decimal64(precision, scale)
decimal128(precision, scale)
decimal256(precision, scale)
struct(...)
list_of(type)
large_list_of(type)
fixed_size_list_of(type, list_size)
map_of(key_type, item_type, .keys_sorted = FALSE)
}
\arguments{
\item{byte_width}{byte width for \code{FixedSizeBinary} type.}
\item{unit}{For time/timestamp types, the time unit. \code{time32()} can take
either "s" or "ms", while \code{time64()} can be "us" or "ns". \code{timestamp()} can
take any of those four values.}
\item{timezone}{For \code{timestamp()}, an optional time zone string.}
\item{precision}{For \code{decimal()}, \code{decimal128()}, and \code{decimal256()} the
number of significant digits the arrow \code{decimal} type can represent. The
maximum precision for \code{decimal128()} is 38 significant digits, while for
\code{decimal256()} it is 76 digits. \code{decimal()} will use it to choose which
type of decimal to return.}
\item{scale}{For \code{decimal()}, \code{decimal128()}, and \code{decimal256()} the number
of digits after the decimal point. It can be negative.}
\item{...}{For \code{struct()}, a named list of types to define the struct columns}
\item{type}{For \code{list_of()}, a data type to make a list-of-type}
\item{list_size}{list size for \code{FixedSizeList} type.}
\item{key_type, item_type}{For \code{MapType}, the key and item types.}
\item{.keys_sorted}{Use \code{TRUE} to assert that keys of a \code{MapType} are
sorted.}
}
\value{
An Arrow type object inheriting from \link{DataType}.
}
\description{
These functions create type objects corresponding to Arrow types. Use them
when defining a \code{\link[=schema]{schema()}} or as inputs to other types, like \code{struct}. Most
of these functions don't take arguments, but a few do.
}
\details{
A few functions have aliases:
\itemize{
\item \code{utf8()} and \code{string()}
\item \code{float16()} and \code{halffloat()}
\item \code{float32()} and \code{float()}
\item \code{bool()} and \code{boolean()}
\item When called inside an \code{arrow} function, such as \code{schema()} or \code{cast()},
\code{double()} also is supported as a way of creating a \code{float64()}
}
\code{date32()} creates a datetime type with a "day" unit, like the R \code{Date}
class. \code{date64()} has a "ms" unit.
\code{uint32} (32 bit unsigned integer), \code{uint64} (64 bit unsigned integer), and
\code{int64} (64-bit signed integer) types may contain values that exceed the
range of R's \code{integer} type (32-bit signed integer). When these arrow objects
are translated to R objects, \code{uint32} and \code{uint64} are converted to \code{double}
("numeric") and \code{int64} is converted to \code{bit64::integer64}. For \code{int64}
types, this conversion can be disabled (so that \code{int64} always yields a
\code{bit64::integer64} object) by setting \code{options(arrow.int64_downcast = FALSE)}.
\code{decimal128()} creates a \code{Decimal128Type}. Arrow decimals are fixed-point
decimal numbers encoded as a scalar integer. The \code{precision} is the number of
significant digits that the decimal type can represent; the \code{scale} is the
number of digits after the decimal point. For example, the number 1234.567
has a precision of 7 and a scale of 3. Note that \code{scale} can be negative.
As an example, \code{decimal128(7, 3)} can exactly represent the numbers 1234.567 and
-1234.567 (encoded internally as the 128-bit integers 1234567 and -1234567,
respectively), but neither 12345.67 nor 123.4567.
\code{decimal128(5, -3)} can exactly represent the number 12345000 (encoded
internally as the 128-bit integer 12345), but neither 123450000 nor 1234500.
The \code{scale} can be thought of as an argument that controls rounding. When
negative, \code{scale} causes the number to be expressed using scientific notation
and power of 10.
\code{decimal256()} creates a \code{Decimal256Type}, which allows for higher maximum
precision. For most use cases, the maximum precision offered by \code{Decimal128Type}
is sufficient, and it will result in a more compact and more efficient encoding.
\code{decimal()} creates either a \code{Decimal128Type} or a \code{Decimal256Type}
depending on the value for \code{precision}. If \code{precision} is greater than 38 a
\code{Decimal256Type} is returned, otherwise a \code{Decimal128Type}.
Use \code{decimal128()} or \code{decimal256()} as the names are more informative than
\code{decimal()}.
}
\examples{
\dontshow{if (arrow_with_acero()) withAutoprint(\{ # examplesIf}
bool()
struct(a = int32(), b = double())
timestamp("ms", timezone = "CEST")
time64("ns")
# Use the cast method to change the type of data contained in Arrow objects.
# Please check the documentation of each data object class for details.
my_scalar <- Scalar$create(0L, type = int64()) # int64
my_scalar$cast(timestamp("ns")) # timestamp[ns]
my_array <- Array$create(0L, type = int64()) # int64
my_array$cast(timestamp("s", timezone = "UTC")) # timestamp[s, tz=UTC]
my_chunked_array <- chunked_array(0L, 1L) # int32
my_chunked_array$cast(date32()) # date32[day]
# You can also use `cast()` in an Arrow dplyr query.
if (requireNamespace("dplyr", quietly = TRUE)) {
library(dplyr, warn.conflicts = FALSE)
arrow_table(mtcars) |>
transmute(
col1 = cast(cyl, string()),
col2 = cast(cyl, int8())
) |>
compute()
}
\dontshow{\}) # examplesIf}
}
\seealso{
\code{\link[=dictionary]{dictionary()}} for creating a dictionary (factor-like) type.
}