blob: 321961c74c78ef6f8a45f87da2ab7fec88def272 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#' Convert an object to a DataFrame
#'
#' @param x An object to convert
#' @param ... Extra arguments passed to/from methods
#' @param schema The requested schema
#'
#' @returns A sedonadb_dataframe
#' @export
#'
#' @examples
#' as_sedonadb_dataframe(data.frame(x = 1:3))
#'
as_sedonadb_dataframe <- function(x, ..., schema = NULL) {
UseMethod("as_sedonadb_dataframe")
}
#' @export
as_sedonadb_dataframe.sedonadb_dataframe <- function(x, ..., schema = NULL) {
# In the future, schema can be handled with a cast
x
}
#' @export
as_sedonadb_dataframe.data.frame <- function(x, ..., schema = NULL) {
array <- nanoarrow::as_nanoarrow_array(x, schema = schema)
stream <- nanoarrow::basic_array_stream(list(array))
ctx <- ctx()
df <- ctx$data_frame_from_array_stream(stream, collect_now = TRUE)
new_sedonadb_dataframe(ctx, df)
}
#' @export
as_sedonadb_dataframe.nanoarrow_array <- function(x, ..., schema = NULL) {
stream <- nanoarrow::as_nanoarrow_array_stream(x, schema = schema)
ctx <- ctx()
df <- ctx$data_frame_from_array_stream(stream, collect_now = TRUE)
# Verify schema is handled
as_sedonadb_dataframe(new_sedonadb_dataframe(ctx, df), schema = schema)
}
#' @export
as_sedonadb_dataframe.nanoarrow_array_stream <- function(x, ..., schema = NULL,
lazy = TRUE) {
stream <- nanoarrow::as_nanoarrow_array_stream(x, schema = schema)
ctx <- ctx()
df <- ctx$data_frame_from_array_stream(stream, collect_now = !lazy)
# Verify schema is handled
as_sedonadb_dataframe(new_sedonadb_dataframe(ctx, df), schema = schema)
}
#' Count rows in a DataFrame
#'
#' @param .data A sedonadb_dataframe
#'
#' @returns The number of rows after executing the query
#' @export
#'
#' @examples
#' sd_sql("SELECT 1 as one") |> sd_count()
#'
sd_count <- function(.data) {
.data$df$count()
}
#' Register a DataFrame as a named view
#'
#' This is useful for creating a view that can be referenced in a SQL
#' statement. Use [sd_drop_view()] to remove it.
#'
#' @inheritParams sd_count
#' @inheritParams sd_drop_view
#' @param overwrite Use TRUE to overwrite a view with the same name (if it exists)
#'
#' @returns .data, invisibly
#' @export
#'
#' @examples
#' sd_sql("SELECT 1 as one") |> sd_to_view("foofy")
#' sd_sql("SELECT * FROM foofy")
#'
sd_to_view <- function(.data, table_ref, overwrite = FALSE) {
.data <- as_sedonadb_dataframe(.data)
.data$df$to_view(.data$ctx, table_ref, overwrite)
invisible(.data)
}
#' Collect a DataFrame into memory
#'
#' Use `sd_compute()` to collect and return the result as a DataFrame;
#' use `sd_collect()` to collect and return the result as an R data.frame.
#'
#' @inheritParams sd_count
#' @param ptype The target R object. See [nanoarrow::convert_array_stream].
#'
#' @returns `sd_compute()` returns a sedonadb_dataframe; `sd_collect()` returns
#' a data.frame (or subclass according to `ptype`).
#' @export
#'
#' @examples
#' sd_sql("SELECT 1 as one") |> sd_compute()
#' sd_sql("SELECT 1 as one") |> sd_collect()
#'
sd_compute <- function(.data) {
.data <- as_sedonadb_dataframe(.data)
df <- .data$df$compute(.data$ctx)
new_sedonadb_dataframe(.data$ctx, df)
}
#' @export
#' @rdname sd_compute
sd_collect <- function(.data, ptype = NULL) {
.data <- as_sedonadb_dataframe(.data)
stream <- nanoarrow::nanoarrow_allocate_array_stream()
size <- .data$df$collect(stream)
nanoarrow::convert_array_stream(stream, size = size, to = ptype)
}
#' Preview and print the results of running a query
#'
#' This is used to implement `print()` for the sedonadb_dataframe or can
#' be used to explicitly preview if `options(sedonadb.interactive = FALSE)`.
#'
#' @inheritParams sd_count
#' @param n The number of rows to preview. Use `Inf` to preview all rows.
#' Defaults to `getOption("pillar.print_max")`.
#' @param ascii Use `TRUE` to force ASCII table formatting or `FALSE` to force
#' unicode formatting. By default, use a heuristic to determine if the output
#' is unicode-friendly or the value of `getOption("cli.unicode")`.
#' @param width The character width of the output. Defaults to
#' `getOption("width")`.
#'
#' @returns .data, invisibly
#' @export
#'
#' @examples
#' sd_sql("SELECT 1 as one") |> sd_preview()
#'
sd_preview <- function(.data, n = NULL, ascii = NULL, width = NULL) {
.data <- as_sedonadb_dataframe(.data)
if (is.null(width)) {
width <- getOption("width")
}
if (is.null(n)) {
n <- getOption("pillar.print_max", 6)
}
if (is.null(ascii)) {
ascii <- !is_utf8_output()
}
content <- .data$df$show(
.data$ctx,
width_chars = as.integer(width),
limit = as.double(n),
ascii = ascii
)
cat(content)
cat(paste0("Preview of up to ", n, " row(s)\n"))
invisible(.data)
}
new_sedonadb_dataframe <- function(ctx, internal_df) {
structure(list(ctx = ctx, df = internal_df), class = "sedonadb_dataframe")
}
#' @importFrom utils head
#' @export
head.sedonadb_dataframe <- function(x, n = 6L, ...) {
new_sedonadb_dataframe(x$ctx, x$df$limit(as.double(n)))
}
#' @export
dimnames.sedonadb_dataframe <- function(x, ...) {
list(NULL, names(infer_nanoarrow_schema(x)$children))
}
#' @export
dim.sedonadb_dataframe <- function(x, ...) {
c(NA_integer_, length(infer_nanoarrow_schema(x)$children))
}
#' @export
as.data.frame.sedonadb_dataframe <- function(x, ...) {
stream <- nanoarrow::nanoarrow_allocate_array_stream()
size <- x$df$collect(stream)
nanoarrow::convert_array_stream(stream, size = size)
}
#' @importFrom nanoarrow infer_nanoarrow_schema
#' @export
infer_nanoarrow_schema.sedonadb_dataframe <- function(x, ...) {
schema <- nanoarrow::nanoarrow_allocate_schema()
x$df$to_arrow_schema(schema)
schema
}
#' @importFrom nanoarrow as_nanoarrow_array_stream
#' @export
as_nanoarrow_array_stream.sedonadb_dataframe <- function(x, ...) {
stream <- nanoarrow::nanoarrow_allocate_array_stream()
x$df$to_arrow_stream(stream)
stream
}
#' @export
print.sedonadb_dataframe <- function(x, ..., width = NULL, n = NULL) {
if (isTRUE(getOption("sedonadb.interactive", TRUE))) {
sd_preview(x, n = n, width = width)
} else {
sd_preview(x, n = 0)
cat("Use options(sedonadb.interactive = TRUE) or use sd_preview() to print\n")
}
invisible(x)
}
# Borrowed from cli but without detecting LaTeX output.
is_utf8_output <- function() {
opt <- getOption("cli.unicode", NULL)
if (!is.null(opt)) {
isTRUE(opt)
} else {
l10n_info()$`UTF-8`
}
}