blob: 785095ef88e6690dc4c6f0cb4f5b294773fa1d0d [file]
# name: test/sql/function/string/test_regexp_substr.test
# description: REGEXP_SUBSTR test
# group: [string]
# Basic tests with one argument
query T
SELECT REGEXP_SUBSTR('abc123def', '[0-9]+')
----
123
query T
SELECT REGEXP_SUBSTR('hello world', '[a-z]+')
----
hello
query T
SELECT REGEXP_SUBSTR('HELLO', '[a-z]+')
----
NULL
query T
SELECT REGEXP_SUBSTR('', '[0-9]+')
----
NULL
# Tests with start position
query T
SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 4)
----
123
query T
SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 7)
----
456
query T
SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 10)
----
456
query T
SELECT REGEXP_SUBSTR('hello world', '[a-z]+', 7)
----
world
# Tests with occurrence
query T
SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 1)
----
123
query T
SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 2)
----
456
query T
SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 3)
----
789
query T
SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 4)
----
NULL
query T
SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 1)
----
456
query T
SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 2)
----
789
# Tests with complex regex patterns
query T
SELECT REGEXP_SUBSTR('user@example.com', '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}')
----
user@example.com
query T
SELECT REGEXP_SUBSTR('Contact: john@doe.com or jane@smith.com', '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', 1, 2)
----
jane@smith.com
query T
SELECT REGEXP_SUBSTR('price: $19.99, discount: $5.99', '\$\d+\.\d{2}')
----
$19.99
query T
SELECT REGEXP_SUBSTR('The date is 2023-12-25', '\d{4}-\d{2}-\d{2}')
----
2023-12-25
query T
SELECT REGEXP_SUBSTR('file123.txt, file456.doc', '[a-z]+\d+\.[a-z]+', 1, 2)
----
file456.doc
# Tests with special characters
query T
SELECT REGEXP_SUBSTR('a+b*c?d', '[+*?]')
----
+
query T
SELECT REGEXP_SUBSTR('(test) [text] {data}', '[\(\)\[\]\{\}]', 1, 3)
----
[
# Edge cases
query T
SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 1)
----
aaaa
query T
SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 2)
----
NULL
query T
SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 4)
----
a
query T
SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 5)
----
NULL
query T
SELECT REGEXP_SUBSTR('123-456-789', '\d+', 1, 3)
----
789
# Tests with NULL values
query T
SELECT REGEXP_SUBSTR(NULL, '[0-9]+')
----
NULL
query T
SELECT REGEXP_SUBSTR('abc123', NULL)
----
NULL
query T
SELECT REGEXP_SUBSTR('abc123', '[0-9]+', NULL)
----
NULL
query T
SELECT REGEXP_SUBSTR('abc123', '[0-9]+', 1, NULL)
----
NULL
# Tests with overlapping matches
query T
SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 1)
----
aa
query T
SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 2)
----
aa
# Tests with word boundaries
query T
SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 1)
----
cat
query T
SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 2)
----
NULL
# Tests with case sensitivity
query T
SELECT REGEXP_SUBSTR('ABCabc', '[a-z]+')
----
abc
query T
SELECT REGEXP_SUBSTR('ABCabc', '[A-Z]+')
----
ABC
# Error cases
statement error
SELECT REGEXP_SUBSTR('abc')
statement error
SELECT REGEXP_SUBSTR('abc', '[0-9]+', 'invalid')
statement error
SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 'invalid')
statement error
SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 1, 1)
statement error
SELECT REGEXP_SUBSTR('abc', '[a-z', 1)
# Performance test with large string
query T
SELECT REGEXP_SUBSTR(repeat('abc123', 1000), '[0-9]+', 1, 500)
----
123