blob: b0b508bbc4b0ab373b3cf6752fce7e4eaab87001 [file] [log] [blame]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
context("JsonTableReader")
test_that("Can read json file with scalars columns (ARROW-5503)", {
tf <- tempfile()
on.exit(unlink(tf))
writeLines('
{ "hello": 3.5, "world": false, "yo": "thing" }
{ "hello": 3.25, "world": null }
{ "hello": 3.125, "world": null, "yo": "\u5fcd" }
{ "hello": 0.0, "world": true, "yo": null }
', tf, useBytes=TRUE)
tab1 <- read_json_arrow(tf, as_data_frame = FALSE)
tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE)
tab3 <- read_json_arrow(ReadableFile$create(tf), as_data_frame = FALSE)
expect_equal(tab1, tab2)
expect_equal(tab1, tab3)
expect_equal(
tab1$schema,
schema(hello = float64(), world = boolean(), yo = utf8())
)
tib <- as.data.frame(tab1)
expect_equal(tib$hello, c(3.5, 3.25, 3.125, 0))
expect_equal(tib$world, c(FALSE, NA, NA, TRUE))
expect_equal(tib$yo, c("thing", NA, "\u5fcd", NA))
})
test_that("read_json_arrow() converts to tibble", {
tf <- tempfile()
on.exit(unlink(tf))
writeLines('
{ "hello": 3.5, "world": false, "yo": "thing" }
{ "hello": 3.25, "world": null }
{ "hello": 3.125, "world": null, "yo": "\u5fcd" }
{ "hello": 0.0, "world": true, "yo": null }
', tf, useBytes=TRUE)
tab1 <- read_json_arrow(tf)
tab2 <- read_json_arrow(mmap_open(tf))
tab3 <- read_json_arrow(ReadableFile$create(tf))
expect_s3_class(tab1, "tbl_df")
expect_s3_class(tab2, "tbl_df")
expect_s3_class(tab3, "tbl_df")
expect_equal(tab1, tab2)
expect_equal(tab1, tab3)
expect_equal(tab1$hello, c(3.5, 3.25, 3.125, 0))
expect_equal(tab1$world, c(FALSE, NA, NA, TRUE))
expect_equal(tab1$yo, c("thing", NA, "\u5fcd", NA))
})
test_that("read_json_arrow() supports col_select=", {
tf <- tempfile()
writeLines('
{ "hello": 3.5, "world": false, "yo": "thing" }
{ "hello": 3.25, "world": null }
{ "hello": 3.125, "world": null, "yo": "\u5fcd" }
{ "hello": 0.0, "world": true, "yo": null }
', tf)
tab1 <- read_json_arrow(tf, col_select = c(hello, world))
expect_equal(names(tab1), c("hello", "world"))
tab2 <- read_json_arrow(tf, col_select = 1:2)
expect_equal(names(tab2), c("hello", "world"))
})
test_that("Can read json file with nested columns (ARROW-5503)", {
tf <- tempfile()
on.exit(unlink(tf))
writeLines('
{ "arr": [1.0, 2.0, 3.0], "nuf": {} }
{ "arr": [2.0], "nuf": null }
{ "arr": [], "nuf": { "ps": 78.0, "hello": "hi" } }
{ "arr": null, "nuf": { "ps": 90.0, "hello": "bonjour" } }
{ "arr": [5.0], "nuf": { "hello": "ciao" } }
{ "arr": [5.0, 6.0], "nuf": { "ps": 19 } }
', tf)
tab1 <- read_json_arrow(tf, as_data_frame = FALSE)
tab2 <- read_json_arrow(mmap_open(tf), as_data_frame = FALSE)
tab3 <- read_json_arrow(ReadableFile$create(tf), as_data_frame = FALSE)
expect_equal(tab1, tab2)
expect_equal(tab1, tab3)
expect_equal(
tab1$schema,
schema(
arr = list_of(float64()),
nuf = struct(ps = float64(), hello = utf8())
)
)
struct_array <- tab1$column(1)$chunk(0)
ps <- Array$create(c(NA, NA, 78, 90, NA, 19))
hello <- Array$create(c(NA, NA, "hi", "bonjour", "ciao", NA))
expect_equal(struct_array$field(0L), ps)
expect_equal(struct_array$GetFieldByName("ps"), ps)
expect_equal(struct_array$Flatten(), list(ps, hello))
expect_equal(
as.vector(struct_array),
tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector())
)
list_array_r <- list(
c(1, 2, 3),
c(2),
numeric(),
NULL,
5,
c(5, 6)
)
list_array <- tab1$column(0)
expect_equivalent(
list_array$as_vector(),
list_array_r
)
tib <- as.data.frame(tab1)
expect_equivalent(
tib,
tibble::tibble(
arr = list_array_r,
nuf = tibble::tibble(ps = ps$as_vector(), hello = hello$as_vector())
)
)
})
test_that("Can read json file with list<struct<T...>> nested columns (ARROW-7740)", {
tf <- tempfile()
on.exit(unlink(tf))
writeLines('
{"a":[{"b":1.0},{"b":2.0}]}
{"a":[{"b":1.0},{"b":2.0}]}
', tf)
one <- tibble::tibble(b = c(1, 2))
expected <- tibble::tibble(a = c(list(one), list(one)))
expect_equivalent(read_json_arrow(tf), expected)
})