| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| context("Schema metadata and R attributes") |
| |
| test_that("Schema metadata", { |
| s <- schema(b = double()) |
| expect_equivalent(s$metadata, list()) |
| expect_false(s$HasMetadata) |
| s$metadata <- list(test = TRUE) |
| expect_identical(s$metadata, list(test = "TRUE")) |
| expect_true(s$HasMetadata) |
| s$metadata$foo <- 42 |
| expect_identical(s$metadata, list(test = "TRUE", foo = "42")) |
| expect_true(s$HasMetadata) |
| s$metadata$foo <- NULL |
| expect_identical(s$metadata, list(test = "TRUE")) |
| expect_true(s$HasMetadata) |
| s$metadata <- NULL |
| expect_equivalent(s$metadata, list()) |
| expect_false(s$HasMetadata) |
| expect_error( |
| s$metadata <- 4, |
| "Key-value metadata must be a named list or character vector" |
| ) |
| }) |
| |
| test_that("Table metadata", { |
| tab <- Table$create(x = 1:2, y = c("a", "b")) |
| expect_equivalent(tab$metadata, list()) |
| tab$metadata <- list(test = TRUE) |
| expect_identical(tab$metadata, list(test = "TRUE")) |
| tab$metadata$foo <- 42 |
| expect_identical(tab$metadata, list(test = "TRUE", foo = "42")) |
| tab$metadata$foo <- NULL |
| expect_identical(tab$metadata, list(test = "TRUE")) |
| tab$metadata <- NULL |
| expect_equivalent(tab$metadata, list()) |
| }) |
| |
| test_that("Table R metadata", { |
| tab <- Table$create(example_with_metadata) |
| expect_output(print(tab$metadata), "arrow_r_metadata") |
| expect_identical(as.data.frame(tab), example_with_metadata) |
| }) |
| |
| test_that("R metadata is not stored for types that map to Arrow types (factor, Date, etc.)", { |
| tab <- Table$create(example_data[1:6]) |
| expect_null(tab$metadata$r) |
| |
| expect_null(Table$create(example_with_times[1:3])$metadata$r) |
| }) |
| |
| test_that("Garbage R metadata doesn't break things", { |
| tab <- Table$create(example_data[1:6]) |
| tab$metadata$r <- "garbage" |
| expect_warning( |
| expect_identical(as.data.frame(tab), example_data[1:6]), |
| "Invalid metadata$r", |
| fixed = TRUE |
| ) |
| # serialize data like .serialize_arrow_r_metadata does, but don't call that |
| # directly since it checks to ensure that the data is a list |
| tab$metadata$r <- rawToChar(serialize("garbage", NULL, ascii = TRUE)) |
| expect_warning( |
| expect_identical(as.data.frame(tab), example_data[1:6]), |
| "Invalid metadata$r", |
| fixed = TRUE |
| ) |
| }) |
| |
| test_that("Metadata serialization compression", { |
| # attributes that (when serialized) are just under 100kb are not compressed, |
| # and simply serialized |
| strings <- as.list(rep(make_string_of_size(1), 98)) |
| small <- .serialize_arrow_r_metadata(strings) |
| expect_equal( |
| object.size(small), |
| object.size(rawToChar(serialize(strings, NULL, ascii = TRUE))) |
| ) |
| |
| # Large strings will be compressed |
| large_strings <- as.list(rep(make_string_of_size(1), 100)) |
| large <- .serialize_arrow_r_metadata(large_strings) |
| expect_lt( |
| object.size(large), |
| object.size(rawToChar(serialize(large_strings, NULL, ascii = TRUE))) |
| ) |
| # and this compression ends up being smaller than even the "small" strings |
| expect_lt(object.size(large), object.size(small)) |
| |
| # However strings where compression + serialization is not effective are no |
| # worse than only serialization alone |
| large_few_strings <- as.list(rep(make_random_string_of_size(50), 2)) |
| large_few <- .serialize_arrow_r_metadata(large_few_strings) |
| expect_equal( |
| object.size(large_few), |
| object.size(rawToChar(serialize(large_few_strings, NULL, ascii = TRUE))) |
| ) |
| |
| # But we can disable compression |
| op <- options(arrow.compress_metadata = FALSE); on.exit(options(op)) |
| |
| large_strings <- as.list(rep(make_string_of_size(1), 100)) |
| large <- .serialize_arrow_r_metadata(large_strings) |
| expect_equal( |
| object.size(large), |
| object.size(rawToChar(serialize(large_strings, NULL, ascii = TRUE))) |
| ) |
| }) |
| |
| test_that("RecordBatch metadata", { |
| rb <- RecordBatch$create(x = 1:2, y = c("a", "b")) |
| expect_equivalent(rb$metadata, list()) |
| rb$metadata <- list(test = TRUE) |
| expect_identical(rb$metadata, list(test = "TRUE")) |
| rb$metadata$foo <- 42 |
| expect_identical(rb$metadata, list(test = "TRUE", foo = "42")) |
| rb$metadata$foo <- NULL |
| expect_identical(rb$metadata, list(test = "TRUE")) |
| rb$metadata <- NULL |
| expect_equivalent(rb$metadata, list()) |
| }) |
| |
| test_that("RecordBatch R metadata", { |
| expect_identical(as.data.frame(record_batch(example_with_metadata)), example_with_metadata) |
| }) |
| |
| test_that("R metadata roundtrip via parquet", { |
| skip_if_not_available("parquet") |
| tf <- tempfile() |
| on.exit(unlink(tf)) |
| |
| write_parquet(example_with_metadata, tf) |
| expect_identical(read_parquet(tf), example_with_metadata) |
| }) |
| |
| test_that("R metadata roundtrip via feather", { |
| tf <- tempfile() |
| on.exit(unlink(tf)) |
| |
| write_feather(example_with_metadata, tf) |
| expect_identical(read_feather(tf), example_with_metadata) |
| }) |
| |
| test_that("haven types roundtrip via feather", { |
| tf <- tempfile() |
| on.exit(unlink(tf)) |
| |
| write_feather(haven_data, tf) |
| expect_identical(read_feather(tf), haven_data) |
| }) |
| |
| test_that("Date/time type roundtrip", { |
| rb <- record_batch(example_with_times) |
| expect_r6_class(rb$schema$posixlt$type, "StructType") |
| expect_identical(as.data.frame(rb), example_with_times) |
| }) |
| |
| test_that("metadata keeps attribute of top level data frame", { |
| df <- structure(data.frame(x = 1, y = 2), foo = "bar") |
| tab <- Table$create(df) |
| expect_identical(attr(as.data.frame(tab), "foo"), "bar") |
| expect_identical(as.data.frame(tab), df) |
| }) |
| |
| |
| test_that("metadata drops readr's problems attribute", { |
| readr_like <- tibble::tibble( |
| dbl = 1.1, |
| not_here = NA_character_ |
| ) |
| attributes(readr_like) <- append( |
| attributes(readr_like), |
| list(problems = tibble::tibble( |
| row = 1L, |
| col = NA_character_, |
| expected = "2 columns", |
| actual = "1 columns", |
| file = "'test'" |
| )) |
| ) |
| |
| tab <- Table$create(readr_like) |
| expect_null(attr(as.data.frame(tab), "problems")) |
| }) |
| |
| test_that("metadata of list elements (ARROW-10386)", { |
| df <- data.frame(x = I(list(structure(1, foo = "bar"), structure(2, baz = "qux")))) |
| tab <- Table$create(df) |
| expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar") |
| expect_identical(attr(as.data.frame(tab)$x[[2]], "baz"), "qux") |
| }) |