[CALCITE-6278] Add REGEXP, REGEXP_LIKE function (enabled in Spark library)
diff --git a/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java b/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java
index 69bdbaf..54058f6 100644
--- a/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java
+++ b/babel/src/test/java/org/apache/calcite/test/BabelQuidemTest.java
@@ -131,6 +131,16 @@
                   SqlConformanceEnum.BABEL)
               .with(CalciteConnectionProperty.LENIENT_OPERATOR_LOOKUP, true)
               .connect();
+        case "scott-spark":
+          return CalciteAssert.that()
+              .with(CalciteAssert.SchemaSpec.SCOTT)
+              .with(CalciteConnectionProperty.FUN, "standard,spark")
+              .with(CalciteConnectionProperty.PARSER_FACTORY,
+                  BabelDdlExecutor.class.getName() + "#PARSER_FACTORY")
+              .with(CalciteConnectionProperty.CONFORMANCE,
+                  SqlConformanceEnum.BABEL)
+              .with(CalciteConnectionProperty.LENIENT_OPERATOR_LOOKUP, true)
+              .connect();
         default:
           return super.connect(name, reference);
         }
diff --git a/babel/src/test/resources/sql/spark.iq b/babel/src/test/resources/sql/spark.iq
new file mode 100644
index 0000000..a2ac6a7
--- /dev/null
+++ b/babel/src/test/resources/sql/spark.iq
@@ -0,0 +1,243 @@
+# spark.iq - Babel test for Spark dialect of SQL
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to you under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+!use scott-spark
+!set outputformat csv
+
+#####################################################################
+# String functions ######################
+
+#####################################################################
+# RLIKE
+#
+# string1 RLIKE string2
+# Returns true if str matches regexp, or false otherwise.
+#
+# Returns BOOLEAN
+
+select NULL RLIKE 'abc*';
+EXPR$0
+null
+!ok
+
+select 'abc' RLIKE NULL;
+EXPR$0
+null
+!ok
+
+select 'abc' RLIKE '';
+EXPR$0
+true
+!ok
+
+SELECT 'abc def ghi' RLIKE 'abc';
+EXPR$0
+true
+!ok
+
+SELECT 'abc def ghi' RLIKE 'abcd';
+EXPR$0
+false
+!ok
+
+select 'abc' RLIKE '^\abc$';
+EXPR$0
+false
+!ok
+
+select '\abc' RLIKE '\abc$';
+EXPR$0
+false
+!ok
+
+select '\abc' RLIKE '^\abc$';
+EXPR$0
+false
+!ok
+
+select '\abc' RLIKE '^\\abc$';
+EXPR$0
+true
+!ok
+
+select 'abc' RLIKE '^abc$';
+EXPR$0
+true
+!ok
+
+select 'abc' RLIKE 'abz*';
+EXPR$0
+true
+!ok
+
+SELECT '%SystemDrive%\\Users\\John' RLIKE '%SystemDrive%\\\\Users.*';
+EXPR$0
+true
+!ok
+
+select '%SystemDrive%\Users\John' RLIKE '%SystemDrive%\\Users.*';
+EXPR$0
+true
+!ok
+
+#####################################################################
+# REGEXP
+#
+# REGEXP(str, regexp)
+# Returns true if str matches regexp, or false otherwise.
+#
+# Returns BOOLEAN
+
+select REGEXP(NULL, 'abc*');
+EXPR$0
+null
+!ok
+
+select REGEXP('abc', NULL);
+EXPR$0
+null
+!ok
+
+select REGEXP('abc', '');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP('abc def ghi', 'abc');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP('abc def ghi', 'abcd');
+EXPR$0
+false
+!ok
+
+select REGEXP('abc', '^\abc$');
+EXPR$0
+false
+!ok
+
+select REGEXP('\abc', '\abc$');
+EXPR$0
+false
+!ok
+
+select REGEXP('\abc', '^\abc$');
+EXPR$0
+false
+!ok
+
+select REGEXP('\abc', '^\\abc$');
+EXPR$0
+true
+!ok
+
+select REGEXP('abc', '^abc$');
+EXPR$0
+true
+!ok
+
+select REGEXP('abc', 'abz*');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP('%SystemDrive%\\Users\\John', '%SystemDrive%\\\\Users.*');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*');
+EXPR$0
+true
+!ok
+
+#####################################################################
+# REGEXP_LIKE
+#
+# REGEXP_LIKE(str, regexp)
+# Returns true if str matches regexp, or false otherwise.
+#
+# Returns BOOLEAN
+
+select REGEXP_LIKE(NULL, 'abc*');
+EXPR$0
+null
+!ok
+
+select REGEXP_LIKE('abc', NULL);
+EXPR$0
+null
+!ok
+
+select REGEXP_LIKE('abc', '');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP_LIKE('abc def ghi', 'abc');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP_LIKE('abc def ghi', 'abcd');
+EXPR$0
+false
+!ok
+
+select REGEXP_LIKE('abc', '^\abc$');
+EXPR$0
+false
+!ok
+
+select REGEXP_LIKE('\abc', '\abc$');
+EXPR$0
+false
+!ok
+
+select REGEXP_LIKE('\abc', '^\abc$');
+EXPR$0
+false
+!ok
+
+select REGEXP_LIKE('\abc', '^\\abc$');
+EXPR$0
+true
+!ok
+
+select REGEXP_LIKE('abc', '^abc$');
+EXPR$0
+true
+!ok
+
+select REGEXP_LIKE('abc', 'abz*');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP_LIKE('%SystemDrive%\\Users\\John', '%SystemDrive%\\\\Users.*');
+EXPR$0
+true
+!ok
+
+SELECT REGEXP_LIKE('%SystemDrive%\Users\John', '%SystemDrive%\\Users.*');
+EXPR$0
+true
+!ok
+
+# End spark.iq
diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
index 0b9deb1..e37120e 100644
--- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
+++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java
@@ -232,10 +232,12 @@
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.PARSE_TIMESTAMP;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.PARSE_URL;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.POW;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_CONTAINS;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_EXTRACT;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_EXTRACT_ALL;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_INSTR;
+import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_LIKE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REGEXP_REPLACE;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REPEAT;
 import static org.apache.calcite.sql.fun.SqlLibraryOperators.REVERSE;
@@ -597,6 +599,8 @@
       defineMethod(SPLIT, BuiltInMethod.SPLIT.method, NullPolicy.STRICT);
       defineReflective(PARSE_URL, BuiltInMethod.PARSE_URL2.method,
           BuiltInMethod.PARSE_URL3.method);
+      defineReflective(REGEXP, BuiltInMethod.RLIKE.method);
+      defineReflective(REGEXP_LIKE, BuiltInMethod.RLIKE.method);
       defineReflective(REGEXP_CONTAINS, BuiltInMethod.REGEXP_CONTAINS.method);
       defineReflective(REGEXP_EXTRACT, BuiltInMethod.REGEXP_EXTRACT2.method,
           BuiltInMethod.REGEXP_EXTRACT3.method, BuiltInMethod.REGEXP_EXTRACT4.method);
diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
index 003e66f..38b5b48 100644
--- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
+++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java
@@ -549,6 +549,20 @@
   @LibraryOperator(libraries = {BIG_QUERY})
   public static final SqlFunction REGEXP_SUBSTR = REGEXP_EXTRACT.withName("REGEXP_SUBSTR");
 
+  /** The "REGEXP(value, regexp)" function, equivalent to {@link #RLIKE}. */
+  @LibraryOperator(libraries = {SPARK})
+  public static final SqlFunction REGEXP =
+      SqlBasicFunction.create("REGEXP", ReturnTypes.BOOLEAN_NULLABLE,
+          OperandTypes.STRING_STRING,
+          SqlFunctionCategory.STRING);
+
+  /** The "REGEXP_LIKE(value, regexp)" function, equivalent to {@link #RLIKE}. */
+  @LibraryOperator(libraries = {SPARK})
+  public static final SqlFunction REGEXP_LIKE =
+      SqlBasicFunction.create("REGEXP_LIKE", ReturnTypes.BOOLEAN_NULLABLE,
+          OperandTypes.STRING_STRING,
+          SqlFunctionCategory.STRING);
+
   @LibraryOperator(libraries = {MYSQL})
   public static final SqlFunction COMPRESS =
       SqlBasicFunction.create("COMPRESS",
diff --git a/site/_docs/reference.md b/site/_docs/reference.md
index 08d20ac..21edb54 100644
--- a/site/_docs/reference.md
+++ b/site/_docs/reference.md
@@ -2813,10 +2813,12 @@
 | b | PARSE_TIMESTAMP(format, string[, timeZone])    | Uses format specified by *format* to convert *string* representation of timestamp to a TIMESTAMP WITH LOCAL TIME ZONE value in *timeZone*
 | h s | PARSE_URL(urlString, partToExtract [, keyToExtract] ) | Returns the specified *partToExtract* from the *urlString*. Valid values for *partToExtract* include HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, and USERINFO. *keyToExtract* specifies which query to extract
 | b s | POW(numeric1, numeric2)                      | Returns *numeric1* raised to the power *numeric2*
+| s | REGEXP(string, regexp)                         | Equivalent to `string1 RLIKE string2`
 | b | REGEXP_CONTAINS(string, regexp)                | Returns whether *string* is a partial match for the *regexp*
 | b | REGEXP_EXTRACT(string, regexp [, position [, occurrence]]) | Returns the substring in *string* that matches the *regexp*, starting search at *position* (default 1), and until locating the nth *occurrence* (default 1). Returns NULL if there is no match
 | b | REGEXP_EXTRACT_ALL(string, regexp)             | Returns an array of all substrings in *string* that matches the *regexp*. Returns an empty array if there is no match
 | b | REGEXP_INSTR(string, regexp [, position [, occurrence [, occurrence_position]]]) | Returns the lowest 1-based position of the substring in *string* that matches the *regexp*, starting search at *position* (default 1), and until locating the nth *occurrence* (default 1). Setting occurrence_position (default 0) to 1 returns the end position of substring + 1. Returns 0 if there is no match
+| s | REGEXP_LIKE(string, regexp)                    | Equivalent to `string1 RLIKE string2`
 | b m o | REGEXP_REPLACE(string, regexp, rep [, pos [, occurrence [, matchType]]]) | Replaces all substrings of *string* that match *regexp* with *rep* at the starting *pos* in expr (if omitted, the default is 1), *occurrence* specifies which occurrence of a match to search for (if omitted, the default is 1), *matchType* specifies how to perform matching
 | b | REGEXP_SUBSTR(string, regexp [, position [, occurrence]]) | Synonym for REGEXP_EXTRACT
 | b m p s | REPEAT(string, integer)                  | Returns a string consisting of *string* repeated of *integer* times; returns an empty string if *integer* is less than 1
diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
index 8aaef82..ebf23f2 100644
--- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
+++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
@@ -3570,24 +3570,68 @@
   }
 
   @Test void testRlikeOperator() {
-    SqlOperatorFixture f = fixture()
-        .setFor(SqlLibraryOperators.RLIKE, VM_EXPAND);
-    checkRlike(f.withLibrary(SqlLibrary.SPARK));
-    checkRlike(f.withLibrary(SqlLibrary.HIVE));
+    final SqlOperatorFixture f = fixture().setFor(SqlLibraryOperators.RLIKE, VM_EXPAND);
+    checkRlikeFunc(f, SqlLibrary.HIVE, SqlLibraryOperators.RLIKE);
+    checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.RLIKE);
+    checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP);
+    checkRlikeFunc(f, SqlLibrary.SPARK, SqlLibraryOperators.REGEXP_LIKE);
+    checkNotRlikeFunc(f.withLibrary(SqlLibrary.HIVE));
+    checkNotRlikeFunc(f.withLibrary(SqlLibrary.SPARK));
     checkRlikeFails(f.withLibrary(SqlLibrary.MYSQL));
     checkRlikeFails(f.withLibrary(SqlLibrary.ORACLE));
   }
 
-  static void checkRlike(SqlOperatorFixture f) {
-    f.checkBoolean("'Merrisa@gmail.com' rlike '.+@*\\.com'", true);
-    f.checkBoolean("'Merrisa@gmail.com' rlike '.com$'", true);
-    f.checkBoolean("'acbd' rlike '^ac+'", true);
-    f.checkBoolean("'acb' rlike 'acb|efg'", true);
-    f.checkBoolean("'acb|efg' rlike 'acb\\|efg'", true);
-    f.checkBoolean("'Acbd' rlike '^ac+'", false);
-    f.checkBoolean("'Merrisa@gmail.com' rlike 'Merrisa_'", false);
-    f.checkBoolean("'abcdef' rlike '%cd%'", false);
+  void checkRlikeFunc(SqlOperatorFixture f0, SqlLibrary library, SqlOperator operator) {
+    final Consumer<SqlOperatorFixture> consumer = f -> {
+      f.checkBoolean(binaryExpression(operator, "'Merrisa@gmail.com'", "'.+@*\\.com'"), true);
+      f.checkBoolean(binaryExpression(operator, "'Merrisa@gmail.com'", "'.com$'"), true);
+      f.checkBoolean(binaryExpression(operator, "'acbd'", "'^ac+'"), true);
+      f.checkBoolean(binaryExpression(operator, "'acb'", "'acb|efg'"), true);
+      f.checkBoolean(binaryExpression(operator, "'acb|efg'", "'acb\\|efg'"), true);
+      f.checkBoolean(binaryExpression(operator, "'Acbd'", "'^ac+'"), false);
+      f.checkBoolean(binaryExpression(operator, "'Merrisa@gmail.com'", "'Merrisa_'"), false);
+      f.checkBoolean(binaryExpression(operator, "'abcdef'", "'%cd%'"), false);
+      f.checkBoolean(binaryExpression(operator, "'abc def ghi'", "'abc'"), true);
+      f.checkBoolean(binaryExpression(operator, "'abc def ghi'", "'[a-z]+'"), true);
+      f.checkBoolean(
+          binaryExpression(operator, "'foo@bar.com'",
+          "'@[a-zA-Z0-9-]+\\.[a-zA-Z0-9.]+'"), true);
+      f.checkBoolean(
+          binaryExpression(operator, "'foo@.com'",
+          "'@[a-zA-Z0-9-]+\\.[a-zA-Z0-9.]+'"), false);
+      f.checkBoolean(binaryExpression(operator, "'5556664422'", "'^\\d{10}$'"), true);
+      f.checkBoolean(binaryExpression(operator, "'11555666442233'", "'^\\d{10}$'"), false);
+      f.checkBoolean(binaryExpression(operator, "'55566644221133'", "'\\d{10}'"), true);
+      f.checkBoolean(binaryExpression(operator, "'55as56664as422'", "'\\d{10}'"), false);
+      f.checkBoolean(binaryExpression(operator, "'55as56664as422'", "''"), true);
 
+      // test for string escaped
+      f.checkBoolean(binaryExpression(operator, "'abc'", "'^\\abc$'"), false);
+      f.checkBoolean(
+          binaryExpression(operator, "'%SystemDrive%\\Users\\John'",
+              "'%SystemDrive%\\\\Users.*'"), true);
+      f.checkBoolean(
+          binaryExpression(operator, "'%SystemDrive%\\\\Users\\\\John'",
+          "'%SystemDrive%\\\\\\\\Users.*'"), true);
+      f.checkFails(
+          binaryExpression(operator, "'%SystemDrive%\\Users\\John'",
+          "'%SystemDrive%\\Users.*'"),
+          "(?s).*Illegal/unsupported escape sequence near index.*", true);
+
+      f.checkQuery("select " + binaryExpression(operator, "'abc def ghi'", "'abc'"));
+      f.checkQuery(
+          "select " + binaryExpression(operator, "'foo@bar.com'",
+          "'@[a-zA-Z0-9-]+\\\\.[a-zA-Z0-9-.]+'"));
+      f.checkQuery("select " + binaryExpression(operator, "'55as56664as422'", "'\\d{10}'"));
+
+      f.checkNull(binaryExpression(operator, "'abc def ghi'", "cast(null as varchar)"));
+      f.checkNull(binaryExpression(operator, "cast(null as varchar)", "'abc'"));
+      f.checkNull(binaryExpression(operator, "cast(null as varchar)", "cast(null as varchar)"));
+    };
+    f0.forEachLibrary(list(library), consumer);
+  }
+
+  void checkNotRlikeFunc(SqlOperatorFixture f) {
     f.setFor(SqlLibraryOperators.NOT_RLIKE, VM_EXPAND);
     f.checkBoolean("'Merrisagmail' not rlike '.+@*\\.com'", true);
     f.checkBoolean("'acbd' not rlike '^ac+'", false);
@@ -3595,6 +3639,14 @@
     f.checkBoolean("'Merrisa@gmail.com' not rlike 'Merrisa_'", true);
   }
 
+  private String binaryExpression(SqlOperator operator, String left, String right) {
+    if (SqlLibraryOperators.RLIKE == operator || SqlLibraryOperators.NOT_RLIKE == operator) {
+      return left + " " + operator.getName() + " " + right;
+    } else {
+      return operator.getName() + "( " + left + ", " + right + ")";
+    }
+  }
+
   static void checkRlikeFails(SqlOperatorFixture f) {
     final String noRlike = "(?s).*No match found for function signature RLIKE";
     f.checkFails("^'Merrisa@gmail.com' rlike '.+@*\\.com'^", noRlike, false);