blob: dfa83c35665ce876306ca48e45684029a9ea56c8 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# types.R. This file handles the data type mapping between Spark and R
# The primitive data types, where names(PRIMITIVE_TYPES) are Scala types whereas
# values are equivalent R types. This is stored in an environment to allow for
# more efficient look up (environments use hashmaps).
PRIMITIVE_TYPES <- as.environment(list(
"tinyint" = "integer",
"smallint" = "integer",
"int" = "integer",
"bigint" = "numeric",
"float" = "numeric",
"double" = "numeric",
"decimal" = "numeric",
"string" = "character",
"binary" = "raw",
"boolean" = "logical",
"timestamp" = c("POSIXct", "POSIXt"),
"date" = "Date",
# following types are not SQL types returned by dtypes(). They are listed here for usage
# by checkType() in schema.R.
# TODO: refactor checkType() in schema.R.
"byte" = "integer",
"integer" = "integer"
))
# The complex data types. These do not have any direct mapping to R's types.
COMPLEX_TYPES <- list(
"map" = NA,
"array" = NA,
"struct" = NA)
# The full list of data types.
DATA_TYPES <- as.environment(c(as.list(PRIMITIVE_TYPES), COMPLEX_TYPES))
SHORT_TYPES <- as.environment(list(
"character" = "chr",
"logical" = "logi",
"POSIXct" = "POSIXct",
"integer" = "int",
"numeric" = "num",
"raw" = "raw",
"Date" = "Date",
"map" = "map",
"array" = "array",
"struct" = "struct"
))
# An environment for mapping R to Scala, names are R types and values are Scala types.
rToSQLTypes <- as.environment(list(
"integer" = "integer", # in R, integer is 32bit
"numeric" = "double", # in R, numeric == double which is 64bit
"double" = "double",
"character" = "string",
"logical" = "boolean"))
# Helper function of converting decimal type. When backend returns column type in the
# format of decimal(,) (e.g., decimal(10, 0)), this function coverts the column type
# as double type. This function converts backend returned types that are not the key
# of PRIMITIVE_TYPES, but should be treated as PRIMITIVE_TYPES.
# @param A type returned from the JVM backend.
# @return A type is the key of the PRIMITIVE_TYPES.
specialtypeshandle <- function(type) {
returntype <- NULL
m <- regexec("^decimal(.+)$", type)
matchedStrings <- regmatches(type, m)
if (length(matchedStrings[[1]]) >= 2) {
returntype <- "double"
}
returntype
}
# Helper function that checks supported types in Arrow.
checkSchemaInArrow <- function(schema) {
stopifnot(inherits(schema, "structType"))
if (!requireNamespace("arrow", quietly = TRUE)) {
stop("'arrow' package should be installed.")
}
# Both cases below produce a corrupt value for unknown reason. It needs to be investigated.
field_strings <- sapply(schema$fields(), function(x) x$dataType.toString())
if (any(field_strings == "FloatType")) {
stop("Arrow optimization in R does not support float type yet.")
}
if (any(field_strings == "BinaryType")) {
stop("Arrow optimization in R does not support binary type yet.")
}
if (any(startsWith(field_strings, "ArrayType"))) {
stop("Arrow optimization in R does not support array type yet.")
}
# Arrow optimization in Spark does not yet support both cases below.
if (any(startsWith(field_strings, "StructType"))) {
stop("Arrow optimization in R does not support nested struct type yet.")
}
if (any(startsWith(field_strings, "MapType"))) {
stop("Arrow optimization in R does not support map type yet.")
}
}