| # name: test/sql/function/string/test_regexp_substr.test |
| # description: REGEXP_SUBSTR test |
| # group: [string] |
| |
| # Basic tests with one argument |
| query T |
| SELECT REGEXP_SUBSTR('abc123def', '[0-9]+') |
| ---- |
| 123 |
| |
| query T |
| SELECT REGEXP_SUBSTR('hello world', '[a-z]+') |
| ---- |
| hello |
| |
| query T |
| SELECT REGEXP_SUBSTR('HELLO', '[a-z]+') |
| ---- |
| NULL |
| |
| query T |
| SELECT REGEXP_SUBSTR('', '[0-9]+') |
| ---- |
| NULL |
| |
| # Tests with start position |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 4) |
| ---- |
| 123 |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 7) |
| ---- |
| 456 |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi', '[0-9]+', 10) |
| ---- |
| 456 |
| |
| query T |
| SELECT REGEXP_SUBSTR('hello world', '[a-z]+', 7) |
| ---- |
| world |
| |
| # Tests with occurrence |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 1) |
| ---- |
| 123 |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 2) |
| ---- |
| 456 |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 3) |
| ---- |
| 789 |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 1, 4) |
| ---- |
| NULL |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 1) |
| ---- |
| 456 |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123def456ghi789', '[0-9]+', 7, 2) |
| ---- |
| 789 |
| |
| # Tests with complex regex patterns |
| query T |
| SELECT REGEXP_SUBSTR('user@example.com', '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}') |
| ---- |
| user@example.com |
| |
| query T |
| SELECT REGEXP_SUBSTR('Contact: john@doe.com or jane@smith.com', '[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}', 1, 2) |
| ---- |
| jane@smith.com |
| |
| query T |
| SELECT REGEXP_SUBSTR('price: $19.99, discount: $5.99', '\$\d+\.\d{2}') |
| ---- |
| $19.99 |
| |
| query T |
| SELECT REGEXP_SUBSTR('The date is 2023-12-25', '\d{4}-\d{2}-\d{2}') |
| ---- |
| 2023-12-25 |
| |
| query T |
| SELECT REGEXP_SUBSTR('file123.txt, file456.doc', '[a-z]+\d+\.[a-z]+', 1, 2) |
| ---- |
| file456.doc |
| |
| # Tests with special characters |
| query T |
| SELECT REGEXP_SUBSTR('a+b*c?d', '[+*?]') |
| ---- |
| + |
| |
| query T |
| SELECT REGEXP_SUBSTR('(test) [text] {data}', '[\(\)\[\]\{\}]', 1, 3) |
| ---- |
| [ |
| |
| # Edge cases |
| query T |
| SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 1) |
| ---- |
| aaaa |
| |
| query T |
| SELECT REGEXP_SUBSTR('aaaa', 'a+', 1, 2) |
| ---- |
| NULL |
| |
| query T |
| SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 4) |
| ---- |
| a |
| |
| query T |
| SELECT REGEXP_SUBSTR('aaaa', 'a', 1, 5) |
| ---- |
| NULL |
| |
| query T |
| SELECT REGEXP_SUBSTR('123-456-789', '\d+', 1, 3) |
| ---- |
| 789 |
| |
| # Tests with NULL values |
| query T |
| SELECT REGEXP_SUBSTR(NULL, '[0-9]+') |
| ---- |
| NULL |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123', NULL) |
| ---- |
| NULL |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123', '[0-9]+', NULL) |
| ---- |
| NULL |
| |
| query T |
| SELECT REGEXP_SUBSTR('abc123', '[0-9]+', 1, NULL) |
| ---- |
| NULL |
| |
| # Tests with overlapping matches |
| query T |
| SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 1) |
| ---- |
| aa |
| |
| query T |
| SELECT REGEXP_SUBSTR('aaaa', 'aa', 1, 2) |
| ---- |
| aa |
| |
| # Tests with word boundaries |
| query T |
| SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 1) |
| ---- |
| cat |
| |
| query T |
| SELECT REGEXP_SUBSTR('cat concat scat', '\bcat\b', 1, 2) |
| ---- |
| NULL |
| |
| # Tests with case sensitivity |
| query T |
| SELECT REGEXP_SUBSTR('ABCabc', '[a-z]+') |
| ---- |
| abc |
| |
| query T |
| SELECT REGEXP_SUBSTR('ABCabc', '[A-Z]+') |
| ---- |
| ABC |
| |
| # Error cases |
| statement error |
| SELECT REGEXP_SUBSTR('abc') |
| |
| statement error |
| SELECT REGEXP_SUBSTR('abc', '[0-9]+', 'invalid') |
| |
| statement error |
| SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 'invalid') |
| |
| statement error |
| SELECT REGEXP_SUBSTR('abc', '[0-9]+', 1, 1, 1) |
| |
| statement error |
| SELECT REGEXP_SUBSTR('abc', '[a-z', 1) |
| |
| # Performance test with large string |
| query T |
| SELECT REGEXP_SUBSTR(repeat('abc123', 1000), '[0-9]+', 1, 500) |
| ---- |
| 123 |