| /*------------------------------------------------------------------------- |
| * |
| * like.c |
| * like expression handling code. |
| * |
| * NOTES |
| * A big hack of the regexp.c code!! Contributed by |
| * Keith Parks <emkxp01@mtcc.demon.co.uk> (7/95). |
| * |
| * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/like.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <ctype.h> |
| |
| #include "catalog/pg_collation.h" |
| #include "mb/pg_wchar.h" |
| #include "miscadmin.h" |
| #include "utils/builtins.h" |
| #include "utils/pg_locale.h" |
| |
| |
| #define LIKE_TRUE 1 |
| #define LIKE_FALSE 0 |
| #define LIKE_ABORT (-1) |
| |
| |
| static int SB_MatchText(const char *t, int tlen, const char *p, int plen, |
| pg_locale_t locale, bool locale_is_c); |
| static text *SB_do_like_escape(text *, text *); |
| |
| static int MB_MatchText(const char *t, int tlen, const char *p, int plen, |
| pg_locale_t locale, bool locale_is_c); |
| static text *MB_do_like_escape(text *, text *); |
| |
| static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, |
| pg_locale_t locale, bool locale_is_c); |
| |
| static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, |
| pg_locale_t locale, bool locale_is_c); |
| |
| static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation); |
| static int Generic_Text_IC_like(text *str, text *pat, Oid collation); |
| |
| /*-------------------- |
| * Support routine for MatchText. Compares given multibyte streams |
| * as wide characters. If they match, returns 1 otherwise returns 0. |
| *-------------------- |
| */ |
| static inline int |
| wchareq(const char *p1, const char *p2) |
| { |
| int p1_len; |
| |
| /* Optimization: quickly compare the first byte. */ |
| if (*p1 != *p2) |
| return 0; |
| |
| p1_len = pg_mblen(p1); |
| if (pg_mblen(p2) != p1_len) |
| return 0; |
| |
| /* They are the same length */ |
| while (p1_len--) |
| { |
| if (*p1++ != *p2++) |
| return 0; |
| } |
| return 1; |
| } |
| |
| /* |
| * Formerly we had a routine iwchareq() here that tried to do case-insensitive |
| * comparison of multibyte characters. It did not work at all, however, |
| * because it relied on tolower() which has a single-byte API ... and |
| * towlower() wouldn't be much better since we have no suitably cheap way |
| * of getting a single character transformed to the system's wchar_t format. |
| * So now, we just downcase the strings using lower() and apply regular LIKE |
| * comparison. This should be revisited when we install better locale support. |
| */ |
| |
| /* |
| * We do handle case-insensitive matching for single-byte encodings using |
| * fold-on-the-fly processing, however. |
| */ |
| static char |
| SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c) |
| { |
| if (locale_is_c) |
| return pg_ascii_tolower(c); |
| #ifdef HAVE_LOCALE_T |
| else if (locale) |
| return tolower_l(c, locale->info.lt); |
| #endif |
| else |
| return pg_tolower(c); |
| } |
| |
| |
| #define NextByte(p, plen) ((p)++, (plen)--) |
| |
| /* Set up to compile like_match.c for multibyte characters */ |
| #define CHAREQ(p1, p2) wchareq((p1), (p2)) |
| #define NextChar(p, plen) \ |
| do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0) |
| #define CopyAdvChar(dst, src, srclen) \ |
| do { int __l = pg_mblen(src); \ |
| (srclen) -= __l; \ |
| while (__l-- > 0) \ |
| *(dst)++ = *(src)++; \ |
| } while (0) |
| |
| #define MatchText MB_MatchText |
| #define do_like_escape MB_do_like_escape |
| |
| #include "like_match.c" |
| |
| /* Set up to compile like_match.c for single-byte characters */ |
| #define CHAREQ(p1, p2) (*(p1) == *(p2)) |
| #define NextChar(p, plen) NextByte((p), (plen)) |
| #define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--) |
| |
| #define MatchText SB_MatchText |
| #define do_like_escape SB_do_like_escape |
| |
| #include "like_match.c" |
| |
| /* setup to compile like_match.c for single byte case insensitive matches */ |
| #define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c) |
| #define NextChar(p, plen) NextByte((p), (plen)) |
| #define MatchText SB_IMatchText |
| |
| #include "like_match.c" |
| |
| /* setup to compile like_match.c for UTF8 encoding, using fast NextChar */ |
| |
| #define NextChar(p, plen) \ |
| do { (p)++; (plen)--; } while ((plen) > 0 && (*(p) & 0xC0) == 0x80 ) |
| #define MatchText UTF8_MatchText |
| |
| #include "like_match.c" |
| |
| /* Generic for all cases not requiring inline case-folding */ |
| static inline int |
| GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation) |
| { |
| if (collation && !lc_ctype_is_c(collation) && collation != DEFAULT_COLLATION_OID) |
| { |
| pg_locale_t locale = pg_newlocale_from_collation(collation); |
| |
| if (locale && !locale->deterministic) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("nondeterministic collations are not supported for LIKE"))); |
| } |
| |
| if (pg_database_encoding_max_length() == 1) |
| return SB_MatchText(s, slen, p, plen, 0, true); |
| else if (GetDatabaseEncoding() == PG_UTF8) |
| return UTF8_MatchText(s, slen, p, plen, 0, true); |
| else |
| return MB_MatchText(s, slen, p, plen, 0, true); |
| } |
| |
| static inline int |
| Generic_Text_IC_like(text *str, text *pat, Oid collation) |
| { |
| char *s, |
| *p; |
| int slen, |
| plen; |
| pg_locale_t locale = 0; |
| bool locale_is_c = false; |
| |
| if (lc_ctype_is_c(collation)) |
| locale_is_c = true; |
| else if (collation != DEFAULT_COLLATION_OID) |
| { |
| if (!OidIsValid(collation)) |
| { |
| /* |
| * This typically means that the parser could not resolve a |
| * conflict of implicit collations, so report it that way. |
| */ |
| ereport(ERROR, |
| (errcode(ERRCODE_INDETERMINATE_COLLATION), |
| errmsg("could not determine which collation to use for ILIKE"), |
| errhint("Use the COLLATE clause to set the collation explicitly."))); |
| } |
| locale = pg_newlocale_from_collation(collation); |
| |
| if (locale && !locale->deterministic) |
| ereport(ERROR, |
| (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| errmsg("nondeterministic collations are not supported for ILIKE"))); |
| } |
| |
| /* |
| * For efficiency reasons, in the single byte case we don't call lower() |
| * on the pattern and text, but instead call SB_lower_char on each |
| * character. In the multi-byte case we don't have much choice :-(. Also, |
| * ICU does not support single-character case folding, so we go the long |
| * way. |
| */ |
| |
| if (pg_database_encoding_max_length() > 1 || (locale && locale->provider == COLLPROVIDER_ICU)) |
| { |
| pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, |
| PointerGetDatum(pat))); |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| str = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation, |
| PointerGetDatum(str))); |
| s = VARDATA_ANY(str); |
| slen = VARSIZE_ANY_EXHDR(str); |
| if (GetDatabaseEncoding() == PG_UTF8) |
| return UTF8_MatchText(s, slen, p, plen, 0, true); |
| else |
| return MB_MatchText(s, slen, p, plen, 0, true); |
| } |
| else |
| { |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| s = VARDATA_ANY(str); |
| slen = VARSIZE_ANY_EXHDR(str); |
| return SB_IMatchText(s, slen, p, plen, locale, locale_is_c); |
| } |
| } |
| |
| /* |
| * interface routines called by the function manager |
| */ |
| |
| Datum |
| namelike(PG_FUNCTION_ARGS) |
| { |
| Name str = PG_GETARG_NAME(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| char *s, |
| *p; |
| int slen, |
| plen; |
| |
| s = NameStr(*str); |
| slen = strlen(s); |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| |
| result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| namenlike(PG_FUNCTION_ARGS) |
| { |
| Name str = PG_GETARG_NAME(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| char *s, |
| *p; |
| int slen, |
| plen; |
| |
| s = NameStr(*str); |
| slen = strlen(s); |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| |
| result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| textlike(PG_FUNCTION_ARGS) |
| { |
| text *str = PG_GETARG_TEXT_PP(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| char *s, |
| *p; |
| int slen, |
| plen; |
| |
| s = VARDATA_ANY(str); |
| slen = VARSIZE_ANY_EXHDR(str); |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| |
| result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) == LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| textnlike(PG_FUNCTION_ARGS) |
| { |
| text *str = PG_GETARG_TEXT_PP(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| char *s, |
| *p; |
| int slen, |
| plen; |
| |
| s = VARDATA_ANY(str); |
| slen = VARSIZE_ANY_EXHDR(str); |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| |
| result = (GenericMatchText(s, slen, p, plen, PG_GET_COLLATION()) != LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| bytealike(PG_FUNCTION_ARGS) |
| { |
| bytea *str = PG_GETARG_BYTEA_PP(0); |
| bytea *pat = PG_GETARG_BYTEA_PP(1); |
| bool result; |
| char *s, |
| *p; |
| int slen, |
| plen; |
| |
| s = VARDATA_ANY(str); |
| slen = VARSIZE_ANY_EXHDR(str); |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| |
| result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| byteanlike(PG_FUNCTION_ARGS) |
| { |
| bytea *str = PG_GETARG_BYTEA_PP(0); |
| bytea *pat = PG_GETARG_BYTEA_PP(1); |
| bool result; |
| char *s, |
| *p; |
| int slen, |
| plen; |
| |
| s = VARDATA_ANY(str); |
| slen = VARSIZE_ANY_EXHDR(str); |
| p = VARDATA_ANY(pat); |
| plen = VARSIZE_ANY_EXHDR(pat); |
| |
| result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| /* |
| * Case-insensitive versions |
| */ |
| |
| Datum |
| nameiclike(PG_FUNCTION_ARGS) |
| { |
| Name str = PG_GETARG_NAME(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| text *strtext; |
| |
| strtext = DatumGetTextPP(DirectFunctionCall1(name_text, |
| NameGetDatum(str))); |
| result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) == LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| nameicnlike(PG_FUNCTION_ARGS) |
| { |
| Name str = PG_GETARG_NAME(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| text *strtext; |
| |
| strtext = DatumGetTextPP(DirectFunctionCall1(name_text, |
| NameGetDatum(str))); |
| result = (Generic_Text_IC_like(strtext, pat, PG_GET_COLLATION()) != LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| texticlike(PG_FUNCTION_ARGS) |
| { |
| text *str = PG_GETARG_TEXT_PP(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| |
| result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) == LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| Datum |
| texticnlike(PG_FUNCTION_ARGS) |
| { |
| text *str = PG_GETARG_TEXT_PP(0); |
| text *pat = PG_GETARG_TEXT_PP(1); |
| bool result; |
| |
| result = (Generic_Text_IC_like(str, pat, PG_GET_COLLATION()) != LIKE_TRUE); |
| |
| PG_RETURN_BOOL(result); |
| } |
| |
| /* |
| * like_escape() --- given a pattern and an ESCAPE string, |
| * convert the pattern to use Postgres' standard backslash escape convention. |
| */ |
| Datum |
| like_escape(PG_FUNCTION_ARGS) |
| { |
| text *pat = PG_GETARG_TEXT_PP(0); |
| text *esc = PG_GETARG_TEXT_PP(1); |
| text *result; |
| |
| if (pg_database_encoding_max_length() == 1) |
| result = SB_do_like_escape(pat, esc); |
| else |
| result = MB_do_like_escape(pat, esc); |
| |
| PG_RETURN_TEXT_P(result); |
| } |
| |
| /* |
| * like_escape_bytea() --- given a pattern and an ESCAPE string, |
| * convert the pattern to use Postgres' standard backslash escape convention. |
| */ |
| Datum |
| like_escape_bytea(PG_FUNCTION_ARGS) |
| { |
| bytea *pat = PG_GETARG_BYTEA_PP(0); |
| bytea *esc = PG_GETARG_BYTEA_PP(1); |
| bytea *result = SB_do_like_escape((text *) pat, (text *) esc); |
| |
| PG_RETURN_BYTEA_P((bytea *) result); |
| } |