ARROW-11468: [R] Allow user to pass schema to read_json_arrow()

A couple of things I wanted to check are expected behaviour:

1. If I specify in the schema that a numeric column should be a string column, I get the error `Error: Invalid: JSON parse error: Column(/third_col) changed from string to number in row 0`
 (e.g. if I run the following)
```
tf <- tempfile()
writeLines('
    { "hello": 3.5, "world": 2, "third_col": 99}
    { "hello": 3.25, "world": 5, "third_col": 98}
    { "hello": 3.125, "world": 8, "third_col": 97 }
    { "hello": 0.0, "world": 10, "third_col": 96}
', tf)
read_json_arrow(tf, schema = schema(third_col = utf8(), world = float64()))
```
2. As can be seen in the tests output (will delete the `print` statements before this is merged), table columns are returned in the order specified in the schema and then the columns not mentioned in the schema.

Closes #9950 from thisisnic/ARROW-11468

Authored-by: Nic Crane <thisisnic@gmail.com>
Signed-off-by: Neal Richardson <neal.p.richardson@gmail.com>
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 9811dc9..a33cf22 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -1052,8 +1052,12 @@
     .Call(`_arrow_json___ReadOptions__initialize`, use_threads, block_size)
 }
 
-json___ParseOptions__initialize <- function(newlines_in_values){
-    .Call(`_arrow_json___ParseOptions__initialize`, newlines_in_values)
+json___ParseOptions__initialize1 <- function(newlines_in_values){
+    .Call(`_arrow_json___ParseOptions__initialize1`, newlines_in_values)
+}
+
+json___ParseOptions__initialize2 <- function(newlines_in_values, explicit_schema){
+    .Call(`_arrow_json___ParseOptions__initialize2`, newlines_in_values, explicit_schema)
 }
 
 json___TableReader__Make <- function(input, read_options, parse_options){
diff --git a/r/R/json.R b/r/R/json.R
index cc16774..89595a5 100644
--- a/r/R/json.R
+++ b/r/R/json.R
@@ -20,6 +20,7 @@
 #' Using [JsonTableReader]
 #'
 #' @inheritParams read_delim_arrow
+#' @param schema [Schema] that describes the table.
 #' @param ... Additional options passed to `JsonTableReader$create()`
 #'
 #' @return A `data.frame`, or a Table if `as_data_frame = FALSE`.
@@ -38,12 +39,13 @@
 read_json_arrow <- function(file,
                             col_select = NULL,
                             as_data_frame = TRUE,
+                            schema = NULL,
                             ...) {
   if (!inherits(file, "InputStream")) {
     file <- make_readable_file(file)
     on.exit(file$close())
   }
-  tab <- JsonTableReader$create(file, ...)$Read()
+  tab <- JsonTableReader$create(file, schema = schema, ...)$Read()
 
   col_select <- enquo(col_select)
   if (!quo_is_null(col_select)) {
@@ -69,7 +71,8 @@
 )
 JsonTableReader$create <- function(file,
                                    read_options = JsonReadOptions$create(),
-                                   parse_options = JsonParseOptions$create(),
+                                   parse_options = JsonParseOptions$create(schema = schema),
+                                   schema = NULL,
                                    ...) {
   assert_is(file, "InputStream")
   json___TableReader__Make(file, read_options, parse_options)
@@ -91,6 +94,11 @@
 #' @docType class
 #' @export
 JsonParseOptions <- R6Class("JsonParseOptions", inherit = ArrowObject)
-JsonParseOptions$create <- function(newlines_in_values = FALSE) {
-  json___ParseOptions__initialize(newlines_in_values)
+JsonParseOptions$create <- function(newlines_in_values = FALSE, schema = NULL) {
+  if (is.null(schema)) {
+    json___ParseOptions__initialize1(newlines_in_values)
+  } else {
+    json___ParseOptions__initialize2(newlines_in_values, schema)
+  }
+  
 }
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index 8111891..83765b2 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -4,7 +4,13 @@
 \alias{read_json_arrow}
 \title{Read a JSON file}
 \usage{
-read_json_arrow(file, col_select = NULL, as_data_frame = TRUE, ...)
+read_json_arrow(
+  file,
+  col_select = NULL,
+  as_data_frame = TRUE,
+  schema = NULL,
+  ...
+)
 }
 \arguments{
 \item{file}{A character file name or URI, \code{raw} vector, an Arrow input stream,
@@ -22,6 +28,8 @@
 \item{as_data_frame}{Should the function return a \code{data.frame} (default) or
 an Arrow \link{Table}?}
 
+\item{schema}{\link{Schema} that describes the table.}
+
 \item{...}{Additional options passed to \code{JsonTableReader$create()}}
 }
 \value{
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 4c2ebed..6dc50c3 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -4115,16 +4115,32 @@
 
 // json.cpp
 #if defined(ARROW_R_WITH_ARROW)
-std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(bool newlines_in_values);
-extern "C" SEXP _arrow_json___ParseOptions__initialize(SEXP newlines_in_values_sexp){
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize1(bool newlines_in_values);
+extern "C" SEXP _arrow_json___ParseOptions__initialize1(SEXP newlines_in_values_sexp){
 BEGIN_CPP11
 	arrow::r::Input<bool>::type newlines_in_values(newlines_in_values_sexp);
-	return cpp11::as_sexp(json___ParseOptions__initialize(newlines_in_values));
+	return cpp11::as_sexp(json___ParseOptions__initialize1(newlines_in_values));
 END_CPP11
 }
 #else
-extern "C" SEXP _arrow_json___ParseOptions__initialize(SEXP newlines_in_values_sexp){
-	Rf_error("Cannot call json___ParseOptions__initialize(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+extern "C" SEXP _arrow_json___ParseOptions__initialize1(SEXP newlines_in_values_sexp){
+	Rf_error("Cannot call json___ParseOptions__initialize1(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
+}
+#endif
+
+// json.cpp
+#if defined(ARROW_R_WITH_ARROW)
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize2(bool newlines_in_values, const std::shared_ptr<arrow::Schema>& explicit_schema);
+extern "C" SEXP _arrow_json___ParseOptions__initialize2(SEXP newlines_in_values_sexp, SEXP explicit_schema_sexp){
+BEGIN_CPP11
+	arrow::r::Input<bool>::type newlines_in_values(newlines_in_values_sexp);
+	arrow::r::Input<const std::shared_ptr<arrow::Schema>&>::type explicit_schema(explicit_schema_sexp);
+	return cpp11::as_sexp(json___ParseOptions__initialize2(newlines_in_values, explicit_schema));
+END_CPP11
+}
+#else
+extern "C" SEXP _arrow_json___ParseOptions__initialize2(SEXP newlines_in_values_sexp, SEXP explicit_schema_sexp){
+	Rf_error("Cannot call json___ParseOptions__initialize2(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. ");
 }
 #endif
 
@@ -6835,7 +6851,8 @@
 		{ "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, 
 		{ "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, 
 		{ "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, 
-		{ "_arrow_json___ParseOptions__initialize", (DL_FUNC) &_arrow_json___ParseOptions__initialize, 1}, 
+		{ "_arrow_json___ParseOptions__initialize1", (DL_FUNC) &_arrow_json___ParseOptions__initialize1, 1}, 
+		{ "_arrow_json___ParseOptions__initialize2", (DL_FUNC) &_arrow_json___ParseOptions__initialize2, 2}, 
 		{ "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, 
 		{ "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, 
 		{ "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, 
diff --git a/r/src/json.cpp b/r/src/json.cpp
index 87d4062..edc5e07 100644
--- a/r/src/json.cpp
+++ b/r/src/json.cpp
@@ -31,7 +31,7 @@
 }
 
 // [[arrow::export]]
-std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize(
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize1(
     bool newlines_in_values) {
   auto res =
       std::make_shared<arrow::json::ParseOptions>(arrow::json::ParseOptions::Defaults());
@@ -40,6 +40,16 @@
 }
 
 // [[arrow::export]]
+std::shared_ptr<arrow::json::ParseOptions> json___ParseOptions__initialize2(
+    bool newlines_in_values, const std::shared_ptr<arrow::Schema>& explicit_schema) {
+  auto res =
+      std::make_shared<arrow::json::ParseOptions>(arrow::json::ParseOptions::Defaults());
+  res->newlines_in_values = newlines_in_values;
+  res->explicit_schema = explicit_schema;
+  return res;
+}
+
+// [[arrow::export]]
 std::shared_ptr<arrow::json::TableReader> json___TableReader__Make(
     const std::shared_ptr<arrow::io::InputStream>& input,
     const std::shared_ptr<arrow::json::ReadOptions>& read_options,
diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R
index b0b508b..ad5ff8a 100644
--- a/r/tests/testthat/test-json.R
+++ b/r/tests/testthat/test-json.R
@@ -86,6 +86,94 @@
   expect_equal(names(tab2), c("hello", "world"))
 })
 
+test_that("read_json_arrow(schema=) with empty schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+  
+  tab1 <- read_json_arrow(tf, schema = schema())
+  
+  expect_identical(
+    tab1, 
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      world = c(2L, 5L, 8L, 10L),
+      third_col = c(99L,98L,97L,96L)
+    )               
+  )
+})
+
+test_that("read_json_arrow(schema=) with partial schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97 }
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+  
+  tab1 <- read_json_arrow(tf, schema = schema(third_col = float64(), world = float64()))
+  
+  expect_identical(
+    tab1, 
+    tibble::tibble(
+      third_col = c(99,98,97,96),
+      world = c(2, 5, 8, 10),
+      hello = c(3.5, 3.25, 3.125, 0)
+    )               
+  )
+  
+  tf2 <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": "99"}
+    { "hello": 3.25, "world": 5, "third_col": "98"}
+    { "hello": 3.125, "world": 8, "third_col": "97"}
+  ', tf2)
+  
+  tab2 <- read_json_arrow(tf2, schema = schema(third_col = string(), world = float64()))
+  
+  expect_identical(
+    tab2, 
+    tibble::tibble(
+      third_col = c("99","98","97"),
+      world = c(2, 5, 8),
+      hello = c(3.5, 3.25, 3.125)
+    )               
+  )
+})
+
+test_that("read_json_arrow(schema=) with full schema", {
+  tf <- tempfile()
+  writeLines('
+    { "hello": 3.5, "world": 2, "third_col": 99}
+    { "hello": 3.25, "world": 5, "third_col": 98}
+    { "hello": 3.125, "world": 8, "third_col": 97}
+    { "hello": 0.0, "world": 10, "third_col": 96}
+  ', tf)
+  
+  tab1 <- read_json_arrow(
+    tf,
+    schema = schema(
+      hello = float64(),
+      third_col = float64(),
+      world = float64()
+    )
+  )
+  
+  expect_identical(
+    tab1, 
+    tibble::tibble(
+      hello = c(3.5, 3.25, 3.125, 0),
+      third_col = c(99,98,97,96),
+      world = c(2, 5, 8, 10)
+    )               
+  )
+})
+
 test_that("Can read json file with nested columns (ARROW-5503)", {
   tf <- tempfile()
   on.exit(unlink(tf))