r/tests/testthat/test-dplyr-string-functions.R - arrow - Git at Google

 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.

 skip_if_not_available("utf8proc")

 library(dplyr)
 library(stringr)

 test_that("grepl with ignore.case = FALSE and fixed = TRUE", {
   df <- tibble(x = c("Foo", "bar"))
   expect_dplyr_equal(
     input %>%
       filter(grepl("o", x, fixed = TRUE)) %>%
       collect(),
     df
   )
 })

 test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", {
   df <- tibble(x = c("Foo", "bar"))
   expect_dplyr_equal(
     input %>%
       transmute(x = sub("Foo", "baz", x, fixed = TRUE)) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = gsub("o", "u", x, fixed = TRUE)) %>%
       collect(),
     df
   )
 })

 # many of the remainder of these tests require RE2
 skip_if_not_available("re2")

 test_that("grepl", {
   df <- tibble(x = c("Foo", "bar"))

   for (fixed in c(TRUE, FALSE)) {

     expect_dplyr_equal(
       input %>%
         filter(grepl("Foo", x, fixed = fixed)) %>%
         collect(),
       df
     )
     expect_dplyr_equal(
       input %>%
         transmute(x = grepl("^B.+", x, ignore.case = FALSE, fixed = fixed)) %>%
         collect(),
       df
     )
     expect_dplyr_equal(
       input %>%
         filter(grepl("Foo", x, ignore.case = FALSE, fixed = fixed)) %>%
         collect(),
       df
     )

   }

 })

 test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
   df <- tibble(x = c("Foo", "bar"))

   # base::grepl() ignores ignore.case = TRUE with a warning when fixed = TRUE,
   # so we can't use expect_dplyr_equal() for these tests
   expect_equal(
     df %>%
       Table$create() %>%
       filter(grepl("O", x, ignore.case = TRUE, fixed = TRUE)) %>%
       collect(),
     tibble(x = "Foo")
   )
   expect_equal(
     df %>%
       Table$create() %>%
       filter(x = grepl("^B.+", x, ignore.case = TRUE, fixed = TRUE)) %>%
       collect(),
     tibble(x = character(0))
   )

 })

 test_that("str_detect", {
   df <- tibble(x = c("Foo", "bar"))

   expect_dplyr_equal(
     input %>%
       filter(str_detect(x, regex("^F"))) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE), negate = TRUE)) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       filter(str_detect(x, fixed("o"))) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       filter(str_detect(x, fixed("O"))) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       filter(str_detect(x, fixed("O", ignore_case = TRUE))) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       filter(str_detect(x, fixed("O", ignore_case = TRUE), negate = TRUE)) %>%
       collect(),
     df
   )

 })

 test_that("sub and gsub", {
   df <- tibble(x = c("Foo", "bar"))

   for (fixed in c(TRUE, FALSE)) {

     expect_dplyr_equal(
       input %>%
         transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>%
         collect(),
       df
     )
     expect_dplyr_equal(
       input %>%
         transmute(x = sub("^B.+", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
         collect(),
       df
     )
     expect_dplyr_equal(
       input %>%
         transmute(x = sub("Foo", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
         collect(),
       df
     )

   }
 })

 test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
   df <- tibble(x = c("Foo", "bar"))

   # base::sub() and base::gsub() ignore ignore.case = TRUE with a warning when
   # fixed = TRUE, so we can't use expect_dplyr_equal() for these tests
   expect_equal(
     df %>%
       Table$create() %>%
       transmute(x = sub("O", "u", x, ignore.case = TRUE, fixed = TRUE)) %>%
       collect(),
     tibble(x = c("Fuo", "bar"))
   )
   expect_equal(
     df %>%
       Table$create() %>%
       transmute(x = gsub("o", "u", x, ignore.case = TRUE, fixed = TRUE)) %>%
       collect(),
     tibble(x = c("Fuu", "bar"))
   )
   expect_equal(
     df %>%
       Table$create() %>%
       transmute(x = sub("^B.+", "baz", x, ignore.case = TRUE, fixed = TRUE)) %>%
       collect(),
     df # unchanged
   )

 })

 test_that("str_replace and str_replace_all", {
   df <- tibble(x = c("Foo", "bar"))

   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace_all(x, "^F", "baz")) %>%
       collect(),
     df
   )

   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace_all(x, regex("^F"), "baz")) %>%
       collect(),
     df
   )

   expect_dplyr_equal(
     input %>%
       mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>%
       collect(),
     df
   )

   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace_all(x, fixed("o"), "u")) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, fixed("O"), "u")) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, fixed("O", ignore_case = TRUE), "u")) %>%
       collect(),
     df
   )

 })

 test_that("strsplit and str_split", {

   df <- tibble(x = c("Foo and bar", "baz and qux and quux"))

   expect_dplyr_equal(
     input %>%
       mutate(x = strsplit(x, "and")) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       mutate(x = str_split(x, "and")) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       mutate(x = str_split(x, "and", n = 2)) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       mutate(x = str_split(x, fixed("and"), n = 2)) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       mutate(x = str_split(x, regex("and"), n = 2)) %>%
       collect(),
     df
   )

 })

 test_that("arrow_*_split_whitespace functions", {

   # use only ASCII whitespace characters
   df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))

   # use only non-ASCII whitespace characters
   df_utf8 <- tibble(x = c("Foo\u00A0and\u2000bar", "baz\u2006and\u1680qux\u3000and\u2008quux"))

   df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", "and", "quux")))

   # use default option values
   expect_equivalent(
     df_ascii %>%
       Table$create() %>%
       mutate(x = arrow_ascii_split_whitespace(x)) %>%
       collect(),
     df_split
   )
   expect_equivalent(
     df_utf8 %>%
       Table$create() %>%
       mutate(x = arrow_utf8_split_whitespace(x)) %>%
       collect(),
     df_split
   )

   # specify non-default option values
   expect_equivalent(
     df_ascii %>%
       Table$create() %>%
       mutate(
         x = arrow_ascii_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
       ) %>%
       collect(),
     tibble(x = list(c("Foo\nand", "bar"), c("baz\tand qux and", "quux")))
   )
   expect_equivalent(
     df_utf8 %>%
       Table$create() %>%
       mutate(
         x = arrow_utf8_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
       ) %>%
       collect(),
     tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux")))
   )

 })

 test_that("errors and warnings in string splitting", {
   df <- tibble(x = c("Foo and bar", "baz and qux and quux"))

   # These conditions generate an error, but abandon_ship() catches the error,
   # issues a warning, and pulls the data into R
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = strsplit(x, "and.*", fixed = FALSE)) %>%
       collect(),
     regexp = "not supported"
   )
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_split(x, "and.?")) %>%
       collect()
   )
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_split(x, regex("and.?"), n = 2)) %>%
       collect(),
     regexp = "not supported"
   )
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_split(x, fixed("and", ignore_case = TRUE))) %>%
       collect(),
     "not supported"
   )
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_split(x, coll("and.?"))) %>%
       collect(),
     regexp = "not supported"
   )
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_split(x, boundary(type = "word"))) %>%
       collect(),
     regexp = "not supported"
   )
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_split(x, "and", n = 0)) %>%
       collect(),
     regexp = "not supported"
   )

   # This condition generates a warning
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_split(x, fixed("and"), simplify = TRUE)) %>%
       collect(),
     "ignored"
   )

 })

 test_that("errors and warnings in string detection and replacement", {
   df <- tibble(x = c("Foo", "bar"))

   # These conditions generate an error, but abandon_ship() catches the error,
   # issues a warning, and pulls the data into R
   expect_warning(
     df %>%
       Table$create() %>%
       filter(str_detect(x, boundary(type = "character"))) %>%
       collect(),
     regexp = "not implemented"
   )
   expect_warning(
     df %>%
       Table$create() %>%
       mutate(x = str_replace_all(x, coll("o", locale = "en"), "ó")) %>%
       collect(),
     regexp = "not supported"
   )

   # This condition generates a warning
   expect_warning(
     df %>%
       Table$create() %>%
       transmute(x = str_replace_all(x, regex("o", multiline = TRUE), "u")),
     "Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
   )

 })

 test_that("backreferences in pattern in string detection", {
   skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)")
   df <- tibble(x = c("Foo", "bar"))

   expect_dplyr_equal(
     input %>%
       filter(str_detect(x, regex("F([aeiou])\\1"))) %>%
       collect(),
     df
   )
 })

 test_that("backreferences (substitutions) in string replacement", {
   df <- tibble(x = c("Foo", "bar"))

   expect_dplyr_equal(
     input %>%
       transmute(desc = sub(
         "(?:https?|ftp)://([^/\r\n]+)(/[^\r\n]*)?",
         "path `\\2` on server `\\1`",
         url
         )
       ) %>%
       collect(),
     tibble(url = "https://arrow.apache.org/docs/r/")
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, "^(\\w)o(.*)", "\\1\\2p")) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
       collect(),
     df
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
       collect(),
     df
   )
 })

 test_that("edge cases in string detection and replacement", {

   # in case-insensitive fixed match/replace, test that "\\E" in the search
   # string and backslashes in the replacement string are interpreted literally.
   # this test does not use expect_dplyr_equal() because base::sub() and
   # base::grepl() do not support ignore.case = TRUE when fixed = TRUE.
   expect_equal(
     tibble(x = c("\\Q\\e\\D")) %>%
       Table$create() %>%
       filter(grepl("\\E", x, ignore.case = TRUE, fixed = TRUE)) %>%
       collect(),
     tibble(x = c("\\Q\\e\\D"))
   )
   expect_equal(
     tibble(x = c("\\Q\\e\\D")) %>%
       Table$create() %>%
       transmute(x = sub("\\E", "\\L", x, ignore.case = TRUE, fixed = TRUE)) %>%
       collect(),
     tibble(x = c("\\Q\\L\\D"))
   )

   # test that a user's "(?i)" prefix does not break the "(?i)" prefix that's
   # added in case-insensitive regex match/replace
   expect_dplyr_equal(
     input %>%
       filter(grepl("(?i)^[abc]{3}$", x, ignore.case = TRUE, fixed = FALSE)) %>%
       collect(),
     tibble(x = c("ABC"))
   )
   expect_dplyr_equal(
     input %>%
       transmute(x = sub("(?i)^[abc]{3}$", "123", x, ignore.case = TRUE, fixed = FALSE)) %>%
       collect(),
     tibble(x = c("ABC"))
   )

 })
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.

	skip_if_not_available("utf8proc")

	library(dplyr)
	library(stringr)

	test_that("grepl with ignore.case = FALSE and fixed = TRUE", {
	df <- tibble(x = c("Foo", "bar"))
	expect_dplyr_equal(
	input %>%
	filter(grepl("o", x, fixed = TRUE)) %>%
	collect(),
	df
	)
	})

	test_that("sub and gsub with ignore.case = FALSE and fixed = TRUE", {
	df <- tibble(x = c("Foo", "bar"))
	expect_dplyr_equal(
	input %>%
	transmute(x = sub("Foo", "baz", x, fixed = TRUE)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = gsub("o", "u", x, fixed = TRUE)) %>%
	collect(),
	df
	)
	})

	# many of the remainder of these tests require RE2
	skip_if_not_available("re2")

	test_that("grepl", {
	df <- tibble(x = c("Foo", "bar"))

	for (fixed in c(TRUE, FALSE)) {

	expect_dplyr_equal(
	input %>%
	filter(grepl("Foo", x, fixed = fixed)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = grepl("^B.+", x, ignore.case = FALSE, fixed = fixed)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	filter(grepl("Foo", x, ignore.case = FALSE, fixed = fixed)) %>%
	collect(),
	df
	)

	}

	})

	test_that("grepl with ignore.case = TRUE and fixed = TRUE", {
	df <- tibble(x = c("Foo", "bar"))

	# base::grepl() ignores ignore.case = TRUE with a warning when fixed = TRUE,
	# so we can't use expect_dplyr_equal() for these tests
	expect_equal(
	df %>%
	Table$create() %>%
	filter(grepl("O", x, ignore.case = TRUE, fixed = TRUE)) %>%
	collect(),
	tibble(x = "Foo")
	)
	expect_equal(
	df %>%
	Table$create() %>%
	filter(x = grepl("^B.+", x, ignore.case = TRUE, fixed = TRUE)) %>%
	collect(),
	tibble(x = character(0))
	)

	})

	test_that("str_detect", {
	df <- tibble(x = c("Foo", "bar"))

	expect_dplyr_equal(
	input %>%
	filter(str_detect(x, regex("^F"))) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE))) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_detect(x, regex("^f[A-Z]{2}", ignore_case = TRUE), negate = TRUE)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	filter(str_detect(x, fixed("o"))) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	filter(str_detect(x, fixed("O"))) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	filter(str_detect(x, fixed("O", ignore_case = TRUE))) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	filter(str_detect(x, fixed("O", ignore_case = TRUE), negate = TRUE)) %>%
	collect(),
	df
	)

	})

	test_that("sub and gsub", {
	df <- tibble(x = c("Foo", "bar"))

	for (fixed in c(TRUE, FALSE)) {

	expect_dplyr_equal(
	input %>%
	transmute(x = sub("Foo", "baz", x, fixed = fixed)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = sub("^B.+", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = sub("Foo", "baz", x, ignore.case = FALSE, fixed = fixed)) %>%
	collect(),
	df
	)

	}
	})

	test_that("sub and gsub with ignore.case = TRUE and fixed = TRUE", {
	df <- tibble(x = c("Foo", "bar"))

	# base::sub() and base::gsub() ignore ignore.case = TRUE with a warning when
	# fixed = TRUE, so we can't use expect_dplyr_equal() for these tests
	expect_equal(
	df %>%
	Table$create() %>%
	transmute(x = sub("O", "u", x, ignore.case = TRUE, fixed = TRUE)) %>%
	collect(),
	tibble(x = c("Fuo", "bar"))
	)
	expect_equal(
	df %>%
	Table$create() %>%
	transmute(x = gsub("o", "u", x, ignore.case = TRUE, fixed = TRUE)) %>%
	collect(),
	tibble(x = c("Fuu", "bar"))
	)
	expect_equal(
	df %>%
	Table$create() %>%
	transmute(x = sub("^B.+", "baz", x, ignore.case = TRUE, fixed = TRUE)) %>%
	collect(),
	df # unchanged
	)

	})

	test_that("str_replace and str_replace_all", {
	df <- tibble(x = c("Foo", "bar"))

	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace_all(x, "^F", "baz")) %>%
	collect(),
	df
	)

	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace_all(x, regex("^F"), "baz")) %>%
	collect(),
	df
	)

	expect_dplyr_equal(
	input %>%
	mutate(x = str_replace(x, "^F[a-z]{2}", "baz")) %>%
	collect(),
	df
	)

	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace(x, regex("^f[A-Z]{2}", ignore_case = TRUE), "baz")) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace_all(x, fixed("o"), "u")) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace(x, fixed("O"), "u")) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace(x, fixed("O", ignore_case = TRUE), "u")) %>%
	collect(),
	df
	)

	})

	test_that("strsplit and str_split", {

	df <- tibble(x = c("Foo and bar", "baz and qux and quux"))

	expect_dplyr_equal(
	input %>%
	mutate(x = strsplit(x, "and")) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	mutate(x = strsplit(x, "and.*", fixed = TRUE)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	mutate(x = str_split(x, "and")) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	mutate(x = str_split(x, "and", n = 2)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	mutate(x = str_split(x, fixed("and"), n = 2)) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	mutate(x = str_split(x, regex("and"), n = 2)) %>%
	collect(),
	df
	)

	})

	test_that("arrow_*_split_whitespace functions", {

	# use only ASCII whitespace characters
	df_ascii <- tibble(x = c("Foo\nand bar", "baz\tand qux and quux"))

	# use only non-ASCII whitespace characters
	df_utf8 <- tibble(x = c("Foo\u00A0and\u2000bar", "baz\u2006and\u1680qux\u3000and\u2008quux"))

	df_split <- tibble(x = list(c("Foo", "and", "bar"), c("baz", "and", "qux", "and", "quux")))

	# use default option values
	expect_equivalent(
	df_ascii %>%
	Table$create() %>%
	mutate(x = arrow_ascii_split_whitespace(x)) %>%
	collect(),
	df_split
	)
	expect_equivalent(
	df_utf8 %>%
	Table$create() %>%
	mutate(x = arrow_utf8_split_whitespace(x)) %>%
	collect(),
	df_split
	)

	# specify non-default option values
	expect_equivalent(
	df_ascii %>%
	Table$create() %>%
	mutate(
	x = arrow_ascii_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
	) %>%
	collect(),
	tibble(x = list(c("Foo\nand", "bar"), c("baz\tand qux and", "quux")))
	)
	expect_equivalent(
	df_utf8 %>%
	Table$create() %>%
	mutate(
	x = arrow_utf8_split_whitespace(x, options = list(max_splits = 1, reverse = TRUE))
	) %>%
	collect(),
	tibble(x = list(c("Foo\u00A0and", "bar"), c("baz\u2006and\u1680qux\u3000and", "quux")))
	)

	})

	test_that("errors and warnings in string splitting", {
	df <- tibble(x = c("Foo and bar", "baz and qux and quux"))

	# These conditions generate an error, but abandon_ship() catches the error,
	# issues a warning, and pulls the data into R
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = strsplit(x, "and.*", fixed = FALSE)) %>%
	collect(),
	regexp = "not supported"
	)
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_split(x, "and.?")) %>%
	collect()
	)
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_split(x, regex("and.?"), n = 2)) %>%
	collect(),
	regexp = "not supported"
	)
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_split(x, fixed("and", ignore_case = TRUE))) %>%
	collect(),
	"not supported"
	)
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_split(x, coll("and.?"))) %>%
	collect(),
	regexp = "not supported"
	)
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_split(x, boundary(type = "word"))) %>%
	collect(),
	regexp = "not supported"
	)
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_split(x, "and", n = 0)) %>%
	collect(),
	regexp = "not supported"
	)

	# This condition generates a warning
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_split(x, fixed("and"), simplify = TRUE)) %>%
	collect(),
	"ignored"
	)

	})

	test_that("errors and warnings in string detection and replacement", {
	df <- tibble(x = c("Foo", "bar"))

	# These conditions generate an error, but abandon_ship() catches the error,
	# issues a warning, and pulls the data into R
	expect_warning(
	df %>%
	Table$create() %>%
	filter(str_detect(x, boundary(type = "character"))) %>%
	collect(),
	regexp = "not implemented"
	)
	expect_warning(
	df %>%
	Table$create() %>%
	mutate(x = str_replace_all(x, coll("o", locale = "en"), "ó")) %>%
	collect(),
	regexp = "not supported"
	)

	# This condition generates a warning
	expect_warning(
	df %>%
	Table$create() %>%
	transmute(x = str_replace_all(x, regex("o", multiline = TRUE), "u")),
	"Ignoring pattern modifier argument not supported in Arrow: \"multiline\""
	)

	})

	test_that("backreferences in pattern in string detection", {
	skip("RE2 does not support backreferences in pattern (https://github.com/google/re2/issues/101)")
	df <- tibble(x = c("Foo", "bar"))

	expect_dplyr_equal(
	input %>%
	filter(str_detect(x, regex("F([aeiou])\\1"))) %>%
	collect(),
	df
	)
	})

	test_that("backreferences (substitutions) in string replacement", {
	df <- tibble(x = c("Foo", "bar"))

	expect_dplyr_equal(
	input %>%
	transmute(desc = sub(
	"(?:https?\|ftp)://([^/\r\n]+)(/[^\r\n]*)?",
	"path `\\2` on server `\\1`",
	url
	)
	) %>%
	collect(),
	tibble(url = "https://arrow.apache.org/docs/r/")
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace(x, "^(\\w)o(.*)", "\\1\\2p")) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
	collect(),
	df
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = str_replace(x, regex("^(\\w)o(.*)", ignore_case = TRUE), "\\1\\2p")) %>%
	collect(),
	df
	)
	})

	test_that("edge cases in string detection and replacement", {

	# in case-insensitive fixed match/replace, test that "\\E" in the search
	# string and backslashes in the replacement string are interpreted literally.
	# this test does not use expect_dplyr_equal() because base::sub() and
	# base::grepl() do not support ignore.case = TRUE when fixed = TRUE.
	expect_equal(
	tibble(x = c("\\Q\\e\\D")) %>%
	Table$create() %>%
	filter(grepl("\\E", x, ignore.case = TRUE, fixed = TRUE)) %>%
	collect(),
	tibble(x = c("\\Q\\e\\D"))
	)
	expect_equal(
	tibble(x = c("\\Q\\e\\D")) %>%
	Table$create() %>%
	transmute(x = sub("\\E", "\\L", x, ignore.case = TRUE, fixed = TRUE)) %>%
	collect(),
	tibble(x = c("\\Q\\L\\D"))
	)

	# test that a user's "(?i)" prefix does not break the "(?i)" prefix that's
	# added in case-insensitive regex match/replace
	expect_dplyr_equal(
	input %>%
	filter(grepl("(?i)^[abc]{3}$", x, ignore.case = TRUE, fixed = FALSE)) %>%
	collect(),
	tibble(x = c("ABC"))
	)
	expect_dplyr_equal(
	input %>%
	transmute(x = sub("(?i)^[abc]{3}$", "123", x, ignore.case = TRUE, fixed = FALSE)) %>%
	collect(),
	tibble(x = c("ABC"))
	)

	})