blob: 86bda393e2defb53ffe7024df86bc0ce78950360 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
context("Table")
test_that("read_table handles various input streams (ARROW-3450, ARROW-3505)", {
tbl <- tibble::tibble(
int = 1:10, dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
chr = letters[1:10]
)
tab <- Table$create(!!!tbl)
tf <- tempfile()
on.exit(unlink(tf))
expect_deprecated(
write_arrow(tab, tf),
"write_feather"
)
tab1 <- read_feather(tf, as_data_frame = FALSE)
tab2 <- read_feather(normalizePath(tf), as_data_frame = FALSE)
readable_file <- ReadableFile$create(tf)
expect_deprecated(
tab3 <- read_arrow(readable_file, as_data_frame = FALSE),
"read_feather"
)
readable_file$close()
mmap_file <- mmap_open(tf)
mmap_file$close()
expect_equal(tab, tab1)
expect_equal(tab, tab2)
expect_equal(tab, tab3)
})
test_that("Table cast (ARROW-3741)", {
tab <- Table$create(x = 1:10, y = 1:10)
expect_error(tab$cast(schema(x = int32())))
expect_error(tab$cast(schema(x = int32(), z = int32())))
s2 <- schema(x = int16(), y = int64())
tab2 <- tab$cast(s2)
expect_equal(tab2$schema, s2)
expect_equal(tab2$column(0L)$type, int16())
expect_equal(tab2$column(1L)$type, int64())
})
test_that("Table S3 methods", {
tab <- Table$create(example_data)
for (f in c("dim", "nrow", "ncol", "dimnames", "colnames", "row.names", "as.list")) {
fun <- get(f)
expect_identical(fun(tab), fun(example_data), info = f)
}
})
test_that("Table $column and $field", {
tab <- Table$create(x = 1:10, y = 1:10)
expect_equal(tab$field(0), field("x", int32()))
# input validation
expect_error(tab$column(NA), "'i' cannot be NA")
expect_error(tab$column(-1), "subscript out of bounds")
expect_error(tab$column(1000), "subscript out of bounds")
expect_error(tab$column(1:2))
expect_error(tab$column("one"))
expect_error(tab$field(NA), "'i' cannot be NA")
expect_error(tab$field(-1), "subscript out of bounds")
expect_error(tab$field(1000), "subscript out of bounds")
expect_error(tab$field(1:2))
expect_error(tab$field("one"))
})
test_that("[, [[, $ for Table", {
tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
chr = letters[1:10],
fct = factor(letters[1:10])
)
tab <- Table$create(tbl)
expect_identical(names(tab), names(tbl))
expect_data_frame(tab[6:7,], tbl[6:7,])
expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4])
expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)])
expect_vector(tab[, "chr", drop = TRUE], tbl$chr)
# Take within a single chunk
expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4])
expect_data_frame(tab[rep(c(FALSE, TRUE), 5),], tbl[c(2, 4, 6, 8, 10),])
# bool ChunkedArray (with one chunk)
expect_data_frame(tab[tab$lgl,], tbl[tbl$lgl,])
# ChunkedArray with multiple chunks
c1 <- c(TRUE, FALSE, TRUE, TRUE, FALSE)
c2 <- c(FALSE, FALSE, TRUE, TRUE, FALSE)
ca <- ChunkedArray$create(c1, c2)
expect_data_frame(tab[ca,], tbl[c(1, 3, 4, 8, 9),])
# int Array
expect_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4])
# ChunkedArray
expect_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4])
# Expression
expect_data_frame(tab[tab$int > 6,], tbl[tbl$int > 6,])
expect_vector(tab[["int"]], tbl$int)
expect_vector(tab$int, tbl$int)
expect_vector(tab[[4]], tbl$chr)
expect_null(tab$qwerty)
expect_null(tab[["asdf"]])
# List-like column slicing
expect_data_frame(tab[2:4], tbl[2:4])
expect_data_frame(tab[c(2, 1)], tbl[c(2, 1)])
expect_data_frame(tab[-3], tbl[-3])
expect_error(tab[[c(4, 3)]])
expect_error(tab[[NA]], "'i' must be character or numeric, not logical")
expect_error(tab[[NULL]], "'i' must be character or numeric, not NULL")
expect_error(tab[[c("asdf", "jkl;")]], 'length(name) not equal to 1', fixed = TRUE)
expect_error(tab[-3:3], "Invalid column index")
expect_error(tab[1000], "Invalid column index")
expect_error(tab[1:1000], "Invalid column index")
# input validation
expect_error(tab[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"')
expect_error(tab[, c(6, NA)], 'Column indices cannot be NA')
skip("Table with 0 cols doesn't know how many rows it should have")
expect_data_frame(tab[0], tbl[0])
})
test_that("[[<- assignment", {
tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
chr = letters[1:10],
fct = factor(letters[1:10])
)
tab <- Table$create(tbl)
# can remove a column
tab[["chr"]] <- NULL
expect_data_frame(tab, tbl[-4])
# can remove a column by index
tab[[4]] <- NULL
expect_data_frame(tab, tbl[1:3])
# can add a named column
tab[["new"]] <- letters[10:1]
expect_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1]))
# can replace a column by index
tab[[2]] <- as.numeric(10:1)
expect_vector(tab[[2]], as.numeric(10:1))
# can add a column by index
tab[[5]] <- as.numeric(10:1)
expect_vector(tab[[5]], as.numeric(10:1))
expect_vector(tab[["5"]], as.numeric(10:1))
# can replace a column
tab[["int"]] <- 10:1
expect_vector(tab[["int"]], 10:1)
# can use $
tab$new <- NULL
expect_null(as.vector(tab$new))
expect_identical(dim(tab), c(10L, 4L))
tab$int <- 1:10
expect_vector(tab$int, 1:10)
# recycling
tab[["atom"]] <- 1L
expect_vector(tab[["atom"]], rep(1L, 10))
expect_error(
tab[["atom"]] <- 1:6,
"Can't recycle input of size 6 to size 10."
)
# assign Arrow array and chunked_array
array <- Array$create(c(10:1))
tab$array <- array
expect_vector(tab$array, 10:1)
tab$chunked <- chunked_array(1:10)
expect_vector(tab$chunked, 1:10)
# nonsense indexes
expect_error(tab[[NA]] <- letters[10:1], "'i' must be character or numeric, not logical")
expect_error(tab[[NULL]] <- letters[10:1], "'i' must be character or numeric, not NULL")
expect_error(tab[[NA_integer_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
expect_error(tab[[NA_real_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
expect_error(tab[[NA_character_]] <- letters[10:1], "!is.na(i) is not TRUE", fixed = TRUE)
expect_error(tab[[c(1, 4)]] <- letters[10:1], "length(i) not equal to 1", fixed = TRUE)
})
test_that("Table$Slice", {
tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
chr = letters[1:10],
fct = factor(letters[1:10])
)
tab <- Table$create(tbl)
tab2 <- tab$Slice(5)
expect_data_frame(tab2, tbl[6:10,])
tab3 <- tab$Slice(5, 2)
expect_data_frame(tab3, tbl[6:7,])
# Input validation
expect_error(tab$Slice("ten"))
expect_error(tab$Slice(NA_integer_), "Slice 'offset' cannot be NA")
expect_error(tab$Slice(NA), "Slice 'offset' cannot be NA")
expect_error(tab$Slice(10, "ten"))
expect_error(tab$Slice(10, NA_integer_), "Slice 'length' cannot be NA")
expect_error(tab$Slice(NA_integer_, NA_integer_), "Slice 'offset' cannot be NA")
expect_error(tab$Slice(c(10, 10)))
expect_error(tab$Slice(10, c(10, 10)))
expect_error(tab$Slice(1000), "Slice 'offset' greater than array length")
expect_error(tab$Slice(-1), "Slice 'offset' cannot be negative")
expect_error(tab3$Slice(10, 10), "Slice 'offset' greater than array length")
expect_error(tab$Slice(10, -1), "Slice 'length' cannot be negative")
expect_error(tab$Slice(-1, 10), "Slice 'offset' cannot be negative")
})
test_that("head and tail on Table", {
tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
chr = letters[1:10],
fct = factor(letters[1:10])
)
tab <- Table$create(tbl)
expect_data_frame(head(tab), head(tbl))
expect_data_frame(head(tab, 4), head(tbl, 4))
expect_data_frame(head(tab, 40), head(tbl, 40))
expect_data_frame(head(tab, -4), head(tbl, -4))
expect_data_frame(head(tab, -40), head(tbl, -40))
expect_data_frame(tail(tab), tail(tbl))
expect_data_frame(tail(tab, 4), tail(tbl, 4))
expect_data_frame(tail(tab, 40), tail(tbl, 40))
expect_data_frame(tail(tab, -4), tail(tbl, -4))
expect_data_frame(tail(tab, -40), tail(tbl, -40))
})
test_that("Table print method", {
expect_output(
print(tab),
paste(
"Table",
"10 rows x 5 columns",
"$int <int32>",
"$dbl <double>",
"$lgl <bool>",
"$chr <string>",
"$fct <dictionary<values=string, indices=int8>>",
sep = "\n"
),
fixed = TRUE
)
})
test_that("table active bindings", {
tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
chr = letters[1:10],
fct = factor(letters[1:10])
)
tab <- Table$create(tbl)
expect_identical(dim(tbl), dim(tab))
expect_type(tab$columns, "list")
expect_equal(tab$columns[[1]], tab[[1]])
})
test_that("table() handles record batches with splicing", {
batch <- record_batch(x = 1:2, y = letters[1:2])
tab <- Table$create(batch, batch, batch)
expect_equal(tab$schema, batch$schema)
expect_equal(tab$num_rows, 6L)
expect_equivalent(
as.data.frame(tab),
vctrs::vec_rbind(as.data.frame(batch), as.data.frame(batch), as.data.frame(batch))
)
batches <- list(batch, batch, batch)
tab <- Table$create(!!!batches)
expect_equal(tab$schema, batch$schema)
expect_equal(tab$num_rows, 6L)
expect_equivalent(
as.data.frame(tab),
vctrs::vec_rbind(!!!purrr::map(batches, as.data.frame))
)
})
test_that("table() handles ... of arrays, chunked arrays, vectors", {
a <- Array$create(1:10)
ca <- chunked_array(1:5, 6:10)
v <- rnorm(10)
tbl <- tibble::tibble(x = 1:10, y = letters[1:10])
tab <- Table$create(a = a, b = ca, c = v, !!!tbl)
expect_equal(
tab$schema,
schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8())
)
res <- as.data.frame(tab)
expect_equal(names(res), c("a", "b", "c", "x", "y"))
expect_equal(res,
tibble::tibble(a = 1:10, b = 1:10, c = v, x = 1:10, y = letters[1:10])
)
})
test_that("table() auto splices (ARROW-5718)", {
df <- tibble::tibble(x = 1:10, y = letters[1:10])
tab1 <- Table$create(df)
tab2 <- Table$create(!!!df)
expect_equal(tab1, tab2)
expect_equal(tab1$schema, schema(x = int32(), y = utf8()))
expect_equivalent(as.data.frame(tab1), df)
s <- schema(x = float64(), y = utf8())
tab3 <- Table$create(df, schema = s)
tab4 <- Table$create(!!!df, schema = s)
expect_equal(tab3, tab4)
expect_equal(tab3$schema, s)
expect_equivalent(as.data.frame(tab3), df)
})
test_that("Validation when creating table with schema (ARROW-10953)", {
expect_error(
Table$create(data.frame(), schema = schema(a = int32())),
"incompatible. schema has 1 fields, and 0 columns are supplied",
fixed = TRUE
)
expect_error(
Table$create(data.frame(b = 1), schema = schema(a = int32())),
"field at index 1 has name 'a' != 'b'",
fixed = TRUE
)
expect_error(
Table$create(data.frame(b = 2, c = 3), schema = schema(a = int32())),
"incompatible. schema has 1 fields, and 2 columns are supplied",
fixed = TRUE
)
})
test_that("==.Table", {
tab1 <- Table$create(x = 1:2, y = c("a", "b"))
tab2 <- Table$create(x = 1:2, y = c("a", "b"))
tab3 <- Table$create(x = 1:2)
tab4 <- Table$create(x = 1:2, y = c("a", "b"), z = 3:4)
expect_true(tab1 == tab2)
expect_true(tab2 == tab1)
expect_false(tab1 == tab3)
expect_false(tab3 == tab1)
expect_false(tab1 == tab4)
expect_false(tab4 == tab1)
expect_true(all.equal(tab1, tab2))
expect_equal(tab1, tab2)
})
test_that("Table$Equals(check_metadata)", {
tab1 <- Table$create(x = 1:2, y = c("a", "b"))
tab2 <- Table$create(x = 1:2, y = c("a", "b"),
schema = tab1$schema$WithMetadata(list(some="metadata")))
expect_r6_class(tab1, "Table")
expect_r6_class(tab2, "Table")
expect_false(tab1$schema$HasMetadata)
expect_true(tab2$schema$HasMetadata)
expect_identical(tab2$schema$metadata, list(some = "metadata"))
expect_true(tab1 == tab2)
expect_true(tab1$Equals(tab2))
expect_false(tab1$Equals(tab2, check_metadata = TRUE))
expect_failure(expect_equal(tab1, tab2)) # expect_equal has check_metadata=TRUE
expect_equivalent(tab1, tab2) # expect_equivalent has check_metadata=FALSE
expect_false(tab1$Equals(24)) # Not a Table
})
test_that("Table handles null type (ARROW-7064)", {
tab <- Table$create(a = 1:10, n = vctrs::unspecified(10))
expect_equivalent(tab$schema, schema(a = int32(), n = null()))
})
test_that("Can create table with specific dictionary types", {
fact <- example_data[,"fct"]
int_types <- c(int8(), int16(), int32(), int64())
# TODO: test uint types when format allows
# uint_types <- c(uint8(), uint16(), uint32(), uint64())
for (i in int_types) {
sch <- schema(fct = dictionary(i, utf8()))
tab <- Table$create(fact, schema = sch)
expect_equal(sch, tab$schema)
if (i != int64()) {
# TODO: same downcast to int32 as we do for int64() type elsewhere
expect_identical(as.data.frame(tab), fact)
}
}
})
test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
b1 <- record_batch(f = factor(c("a"), levels = c("a", "b")))
b2 <- record_batch(f = factor(c("c"), levels = c("c", "d")))
b3 <- record_batch(f = factor(NA, levels = "a"))
b4 <- record_batch(f = factor())
res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d")))
tab <- Table$create(b1, b2, b3, b4)
expect_identical(as.data.frame(tab), res)
})
test_that("Table$SelectColumns()", {
tab <- Table$create(x = 1:10, y = 1:10)
expect_equal(tab$SelectColumns(0L), Table$create(x = 1:10))
expect_error(tab$SelectColumns(2:4))
expect_error(tab$SelectColumns(""))
})
test_that("Table name assignment", {
tab <- Table$create(x = 1:10, y = 1:10)
expect_identical(names(tab), c("x", "y"))
names(tab) <- c("a", "b")
expect_identical(names(tab), c("a", "b"))
expect_error(names(tab) <- "f")
expect_error(names(tab) <- letters)
expect_error(names(tab) <- character(0))
expect_error(names(tab) <- NULL)
expect_error(names(tab) <- c(TRUE, FALSE))
})
test_that("Table$create() with different length columns", {
msg <- "All columns must have the same length"
expect_error(Table$create(a=1:5, b = 42), msg)
expect_error(Table$create(a=1:5, b = 1:6), msg)
})