| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| |
| # The following S3 methods are registered on load if dplyr is present |
| |
| tbl_vars.arrow_dplyr_query <- function(x) names(x$selected_columns) |
| |
| select.arrow_dplyr_query <- function(.data, ...) { |
| check_select_helpers(enexprs(...)) |
| column_select(arrow_dplyr_query(.data), !!!enquos(...)) |
| } |
| select.Dataset <- select.ArrowTabular <- select.arrow_dplyr_query |
| |
| rename.arrow_dplyr_query <- function(.data, ...) { |
| check_select_helpers(enexprs(...)) |
| column_select(arrow_dplyr_query(.data), !!!enquos(...), .FUN = vars_rename) |
| } |
| rename.Dataset <- rename.ArrowTabular <- rename.arrow_dplyr_query |
| |
| column_select <- function(.data, ..., .FUN = vars_select) { |
| # .FUN is either tidyselect::vars_select or tidyselect::vars_rename |
| # It operates on the names() of selected_columns, i.e. the column names |
| # factoring in any renaming that may already have happened |
| out <- .FUN(names(.data), !!!enquos(...)) |
| # Make sure that the resulting selected columns map back to the original data, |
| # as in when there are multiple renaming steps |
| .data$selected_columns <- set_names(.data$selected_columns[out], names(out)) |
| |
| # If we've renamed columns, we need to project that renaming into other |
| # query parameters we've collected |
| renamed <- out[names(out) != out] |
| if (length(renamed)) { |
| # Massage group_by |
| gbv <- .data$group_by_vars |
| renamed_groups <- gbv %in% renamed |
| gbv[renamed_groups] <- names(renamed)[match(gbv[renamed_groups], renamed)] |
| .data$group_by_vars <- gbv |
| # No need to massage filters because those contain references to Arrow objects |
| } |
| .data |
| } |
| |
| relocate.arrow_dplyr_query <- function(.data, ..., .before = NULL, .after = NULL) { |
| # The code in this function is adapted from the code in dplyr::relocate.data.frame |
| # at https://github.com/tidyverse/dplyr/blob/master/R/relocate.R |
| # TODO: revisit this after https://github.com/tidyverse/dplyr/issues/5829 |
| |
| .data <- arrow_dplyr_query(.data) |
| |
| # Assign the schema to the expressions |
| map(.data$selected_columns, ~(.$schema <- .data$.data$schema)) |
| |
| # Create a mask for evaluating expressions in tidyselect helpers |
| mask <- new_environment(.cache$functions, parent = caller_env()) |
| |
| to_move <- eval_select(substitute(c(...)), .data$selected_columns, mask) |
| |
| .before <- enquo(.before) |
| .after <- enquo(.after) |
| has_before <- !quo_is_null(.before) |
| has_after <- !quo_is_null(.after) |
| |
| if (has_before && has_after) { |
| abort("Must supply only one of `.before` and `.after`.") |
| } else if (has_before) { |
| where <- min(unname(eval_select(quo_get_expr(.before), .data$selected_columns, mask))) |
| if (!where %in% to_move) { |
| to_move <- c(to_move, where) |
| } |
| } else if (has_after) { |
| where <- max(unname(eval_select(quo_get_expr(.after), .data$selected_columns, mask))) |
| if (!where %in% to_move) { |
| to_move <- c(where, to_move) |
| } |
| } else { |
| where <- 1L |
| if (!where %in% to_move) { |
| to_move <- c(to_move, where) |
| } |
| } |
| |
| lhs <- setdiff(seq2(1, where - 1), to_move) |
| rhs <- setdiff(seq2(where + 1, length(.data$selected_columns)), to_move) |
| |
| pos <- vec_unique(c(lhs, to_move, rhs)) |
| new_names <- names(pos) |
| .data$selected_columns <- .data$selected_columns[pos] |
| |
| if (!is.null(new_names)) { |
| names(.data$selected_columns)[new_names != ""] <- new_names[new_names != ""] |
| } |
| .data |
| } |
| relocate.Dataset <- relocate.ArrowTabular <- relocate.arrow_dplyr_query |
| |
| check_select_helpers <- function(exprs) { |
| # Throw an error if unsupported tidyselect selection helpers in `exprs` |
| exprs <- lapply(exprs, function(x) if (is_quosure(x)) quo_get_expr(x) else x) |
| unsup_select_helpers <- "where" |
| funs_in_exprs <- unlist(lapply(exprs, all_funs)) |
| unsup_funs <- funs_in_exprs[funs_in_exprs %in% unsup_select_helpers] |
| if (length(unsup_funs)) { |
| stop( |
| "Unsupported selection ", |
| ngettext(length(unsup_funs), "helper: ", "helpers: "), |
| oxford_paste(paste0(unsup_funs, "()"), quote = FALSE), |
| call. = FALSE |
| ) |
| } |
| } |