[CALCITE-6309] Add REGEXP_LIKE function (enabled in MySQL, Oracle, PostgreSQL and Spark libraries)
Supports a 3-argument REGEXP_LIKE variant that takes in a string of regex flags in
addition to the existing 2-argument variant.
Add support for REGEXP_LIKE(value, regex) to MySQL, Oracle, and PostgreSQL.
Add support for REGEXP_LIKE(value, regex, flags) to MySQL, Oracle, PostgreSQL, and Spark.
Fix RLIKE-related tests such that they validate results instead of
only checking that expressions parse correctly and return the correct
type.
diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 37fdd4a..c622af2 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -601,7 +601,7 @@
defineReflective(PARSE_URL, BuiltInMethod.PARSE_URL2.method,
BuiltInMethod.PARSE_URL3.method);
defineReflective(REGEXP, BuiltInMethod.RLIKE.method);
- defineReflective(REGEXP_LIKE, BuiltInMethod.RLIKE.method);
+ defineReflective(REGEXP_LIKE, BuiltInMethod.RLIKE.method, BuiltInMethod.REGEXP_LIKE3.method);
defineReflective(REGEXP_CONTAINS, BuiltInMethod.REGEXP_CONTAINS.method);
defineReflective(REGEXP_EXTRACT, BuiltInMethod.REGEXP_EXTRACT2.method,
BuiltInMethod.REGEXP_EXTRACT3.method, BuiltInMethod.REGEXP_EXTRACT4.method);
diff --git a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
index 54a9571..523c2d0 100644
--- a/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
+++ b/core/src/main/java/org/apache/calcite/runtime/SqlFunctions.java
@@ -401,10 +401,16 @@
/** Validate regex arguments in REGEXP_* fns, throws an exception
* for invalid regex patterns, else returns a Pattern object. */
private Pattern validateRegexPattern(String regex, String methodName) {
+ return validateRegexPattern(regex, methodName, 0);
+ }
+
+ /** Validate regex arguments in REGEXP_* fns, throws an exception
+ * for invalid regex patterns, else returns a Pattern object. */
+ private Pattern validateRegexPattern(String regex, String methodName, int flags) {
try {
// Uses java.util.regex as a standard for regex processing
// in Calcite instead of RE2 used by BigQuery/GoogleSQL
- return cache.getUnchecked(new Key(0, regex));
+ return cache.getUnchecked(new Key(flags, regex));
} catch (UncheckedExecutionException e) {
if (e.getCause() instanceof PatternSyntaxException) {
throw RESOURCE.invalidRegexInputForRegexpFunctions(
@@ -486,6 +492,14 @@
return pattern.matcher(value).find();
}
+ /** SQL {@code REGEXP_LIKE(value, regexp, flags)} function.
+ * Throws a runtime exception for invalid regular expressions. */
+ @SuppressWarnings("unused")
+ public boolean regexpLike(String value, String regex, String stringFlags) {
+ final Pattern pattern =
+ validateRegexPattern(regex, "REGEXP_LIKE", makeRegexpFlags(stringFlags));
+ return pattern.matcher(value).find();
+ }
/** SQL {@code REGEXP_EXTRACT(value, regexp)} function.
* Returns NULL if there is no match. Returns an exception if regex is invalid.
* Uses position=1 and occurrence=1 as default values when not specified. */
@@ -680,8 +694,15 @@
flags |= Pattern.DOTALL;
break;
case 'm':
+ // PostgreSQL should actually interpret m to be a synonym for n, but this is
+ // relaxed for consistency.
flags |= Pattern.MULTILINE;
break;
+ case 's':
+ // This flag is in PostgreSQL but doesn't apply to other libraries. This is relaxed
+ // for consistency.
+ flags &= ~Pattern.DOTALL;
+ break;
default:
throw RESOURCE.invalidInputForRegexpReplace(stringFlags).ex();
}
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 5f27b50..681671a 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -557,10 +557,10 @@
SqlFunctionCategory.STRING);
/** The "REGEXP_LIKE(value, regexp)" function, equivalent to {@link #RLIKE}. */
- @LibraryOperator(libraries = {SPARK})
+ @LibraryOperator(libraries = {SPARK, MYSQL, POSTGRESQL, ORACLE})
public static final SqlFunction REGEXP_LIKE =
SqlBasicFunction.create("REGEXP_LIKE", ReturnTypes.BOOLEAN_NULLABLE,
- OperandTypes.STRING_STRING,
+ OperandTypes.STRING_STRING_OPTIONAL_STRING,
SqlFunctionCategory.STRING);
@LibraryOperator(libraries = {MYSQL})
diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
index b25a292..53d4d33 100644
--- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
+++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
@@ -563,6 +563,8 @@
String.class, String.class, int.class, int.class),
REGEXP_INSTR5(SqlFunctions.RegexFunction.class, "regexpInstr",
String.class, String.class, int.class, int.class, int.class),
+ REGEXP_LIKE3(SqlFunctions.RegexFunction.class, "regexpLike",
+ String.class, String.class, String.class),
REGEXP_REPLACE3(SqlFunctions.RegexFunction.class, "regexpReplace",
String.class, String.class, String.class),
REGEXP_REPLACE4(SqlFunctions.RegexFunction.class, "regexpReplace",
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 542b1f6..ea9b375 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2821,7 +2821,7 @@
| b | REGEXP_EXTRACT(string, regexp [, position [, occurrence]]) | Returns the substring in *string* that matches the *regexp*, starting search at *position* (default 1), and until locating the nth *occurrence* (default 1). Returns NULL if there is no match
| b | REGEXP_EXTRACT_ALL(string, regexp) | Returns an array of all substrings in *string* that matches the *regexp*. Returns an empty array if there is no match
| b | REGEXP_INSTR(string, regexp [, position [, occurrence [, occurrence_position]]]) | Returns the lowest 1-based position of the substring in *string* that matches the *regexp*, starting search at *position* (default 1), and until locating the nth *occurrence* (default 1). Setting occurrence_position (default 0) to 1 returns the end position of substring + 1. Returns 0 if there is no match
-| s | REGEXP_LIKE(string, regexp) | Equivalent to `string1 RLIKE string2`
+| m o p s | REGEXP_LIKE(string, regexp [, flags]) | Equivalent to `string1 RLIKE string2` with an optional parameter for search flags. Supported flags are: <ul><li>i: case-insensitive matching</li><li>c: case-sensitive matching</li><li>n: newline-sensitive matching</li><li>s: non-newline-sensitive matching</li><li>m: multi-line</li></ul>
| b m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [, matchType]]]) | Replaces all substrings of *string* that match *regexp* with *rep* at the starting *pos* in expr (if omitted, the default is 1), *occurrence* specifies which occurrence of a match to search for (if omitted, the default is 1), *matchType* specifies how to perform matching
| b | REGEXP_SUBSTR(string, regexp [, position [, occurrence]]) | Synonym for REGEXP_EXTRACT
| b m p s | REPEAT(string, integer) | Returns a string consisting of *string* repeated of *integer* times; returns an empty string if *integer* is less than 1
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 9c5535e..9f3c10e 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -3553,15 +3553,21 @@
}
@Test void testRlikeOperator() {
- final SqlOperatorFixture f = fixture().setFor(SqlLibraryOperators.RLIKE, VM_EXPAND);
+ final SqlOperatorFixture f = fixture()
+ .setFor(SqlLibraryOperators.RLIKE, VM_EXPAND);
checkRlikeFunc(f, SqlLibrary.HIVE, SqlLibraryOperators.RLIKE);
checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.RLIKE);
checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP);
- checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP_LIKE);
checkNotRlikeFunc(f.withLibrary(SqlLibrary.HIVE));
checkNotRlikeFunc(f.withLibrary(SqlLibrary.SPARK));
checkRlikeFails(f.withLibrary(SqlLibrary.MYSQL));
checkRlikeFails(f.withLibrary(SqlLibrary.ORACLE));
+
+ f.setFor(SqlLibraryOperators.REGEXP_LIKE, VM_EXPAND);
+ checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP_LIKE);
+ checkRlikeFunc(f, SqlLibrary.POSTGRESQL, SqlLibraryOperators.REGEXP_LIKE);
+ checkRlikeFunc(f, SqlLibrary.MYSQL, SqlLibraryOperators.REGEXP_LIKE);
+ checkRlikeFunc(f, SqlLibrary.ORACLE, SqlLibraryOperators.REGEXP_LIKE);
}
void checkRlikeFunc(SqlOperatorFixture f0, SqlLibrary library, SqlOperator operator) {
@@ -3654,9 +3660,8 @@
}
@Test void testIlikeEscape() {
- final SqlOperatorFixture f =
- fixture().setFor(SqlLibraryOperators.ILIKE, VmName.EXPAND)
- .withLibrary(SqlLibrary.POSTGRESQL);
+ final SqlOperatorFixture f = fixture().setFor(SqlLibraryOperators.ILIKE, VmName.EXPAND)
+ .withLibrary(SqlLibrary.POSTGRESQL);
f.checkBoolean("'a_c' ilike 'a#_C' escape '#'", true);
f.checkBoolean("'axc' ilike 'a#_C' escape '#'", false);
f.checkBoolean("'a_c' ilike 'a\\_C' escape '\\'", true);
@@ -3743,6 +3748,34 @@
}
/** Test case for
+ * <a href="https://issues.apache.org/jira/browse/CALCITE-6309">[CALCITE-6309]
+ * Add REGEXP_LIKE function (enabled in MySQL, Oracle, PostgreSQL and Spark libraries)</a>. */
+ @Test void testRegexpLike3() {
+ final SqlOperatorFixture f = fixture();
+ f.setFor(SqlLibraryOperators.REGEXP_LIKE, VmName.EXPAND);
+
+ final Consumer<SqlOperatorFixture> consumer = f1 -> {
+ f1.checkBoolean("REGEXP_LIKE('teststr', 'TEST', 'i')", true);
+ f1.checkBoolean("REGEXP_LIKE('ateststr', 'TEST', 'c')", false);
+ f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', '')", false);
+ f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 'n')", true);
+ f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'TEST.str', 'in')", true);
+ f1.checkBoolean("REGEXP_LIKE('ateststring', 'teststr', '')", true);
+ f1.checkBoolean("REGEXP_LIKE('ateststring', 'TESTstr', 'ic')", false);
+ f1.checkBoolean("REGEXP_LIKE('ateststring', 'TESTstr', 'ci')", true);
+ f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 's')", false);
+ f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 'ns')", false);
+ f1.checkBoolean("REGEXP_LIKE('atest\nstr', 'test.str', 'sn')", true);
+ f1.checkNull("REGEXP_LIKE(NULL, 'test.str', 'sn')");
+ f1.checkNull("REGEXP_LIKE('atest\nstr', NULL, 'sn')");
+ f1.checkNull("REGEXP_LIKE('atest\nstr', 'test.str', NULL)");
+ };
+ f.forEachLibrary(
+ list(SqlLibrary.MYSQL, SqlLibrary.SPARK,
+ SqlLibrary.POSTGRESQL, SqlLibrary.ORACLE), consumer);
+ }
+
+ /** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-1898">[CALCITE-1898]
* LIKE must match '.' (period) literally</a>. */
@Test void testLikeDot() {