| /*------------------------------------------------------------------------- |
| * oracle_compat.c |
| * Oracle compatible functions. |
| * |
| * Copyright (c) 1996-2023, PostgreSQL Global Development Group |
| * |
| * Author: Edmund Mergl <E.Mergl@bawue.de> |
| * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org> |
| * |
| * |
| * IDENTIFICATION |
| * src/backend/utils/adt/oracle_compat.c |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include "common/int.h" |
| #include "mb/pg_wchar.h" |
| #include "miscadmin.h" |
| #include "utils/builtins.h" |
| #include "utils/formatting.h" |
| #include "utils/memutils.h" |
| #include "varatt.h" |
| |
| |
| static text *dotrim(const char *string, int stringlen, |
| const char *set, int setlen, |
| bool doltrim, bool dortrim); |
| static bytea *dobyteatrim(bytea *string, bytea *set, |
| bool doltrim, bool dortrim); |
| |
| |
| /******************************************************************** |
| * |
| * lower |
| * |
| * Syntax: |
| * |
| * text lower(text string) |
| * |
| * Purpose: |
| * |
| * Returns string, with all letters forced to lowercase. |
| * |
| ********************************************************************/ |
| |
| Datum |
| lower(PG_FUNCTION_ARGS) |
| { |
| text *in_string = PG_GETARG_TEXT_PP(0); |
| char *out_string; |
| text *result; |
| |
| out_string = str_tolower(VARDATA_ANY(in_string), |
| VARSIZE_ANY_EXHDR(in_string), |
| PG_GET_COLLATION()); |
| result = cstring_to_text(out_string); |
| pfree(out_string); |
| |
| PG_RETURN_TEXT_P(result); |
| } |
| |
| |
| /******************************************************************** |
| * |
| * upper |
| * |
| * Syntax: |
| * |
| * text upper(text string) |
| * |
| * Purpose: |
| * |
| * Returns string, with all letters forced to uppercase. |
| * |
| ********************************************************************/ |
| |
| Datum |
| upper(PG_FUNCTION_ARGS) |
| { |
| text *in_string = PG_GETARG_TEXT_PP(0); |
| char *out_string; |
| text *result; |
| |
| out_string = str_toupper(VARDATA_ANY(in_string), |
| VARSIZE_ANY_EXHDR(in_string), |
| PG_GET_COLLATION()); |
| result = cstring_to_text(out_string); |
| pfree(out_string); |
| |
| PG_RETURN_TEXT_P(result); |
| } |
| |
| |
| /******************************************************************** |
| * |
| * initcap |
| * |
| * Syntax: |
| * |
| * text initcap(text string) |
| * |
| * Purpose: |
| * |
| * Returns string, with first letter of each word in uppercase, all |
| * other letters in lowercase. A word is defined as a sequence of |
| * alphanumeric characters, delimited by non-alphanumeric |
| * characters. |
| * |
| ********************************************************************/ |
| |
| Datum |
| initcap(PG_FUNCTION_ARGS) |
| { |
| text *in_string = PG_GETARG_TEXT_PP(0); |
| char *out_string; |
| text *result; |
| |
| out_string = str_initcap(VARDATA_ANY(in_string), |
| VARSIZE_ANY_EXHDR(in_string), |
| PG_GET_COLLATION()); |
| result = cstring_to_text(out_string); |
| pfree(out_string); |
| |
| PG_RETURN_TEXT_P(result); |
| } |
| |
| |
| /******************************************************************** |
| * |
| * lpad |
| * |
| * Syntax: |
| * |
| * text lpad(text string1, int4 len, text string2) |
| * |
| * Purpose: |
| * |
| * Returns string1, left-padded to length len with the sequence of |
| * characters in string2. If len is less than the length of string1, |
| * instead truncate (on the right) to len. |
| * |
| ********************************************************************/ |
| |
| Datum |
| lpad(PG_FUNCTION_ARGS) |
| { |
| text *string1 = PG_GETARG_TEXT_PP(0); |
| int32 len = PG_GETARG_INT32(1); |
| text *string2 = PG_GETARG_TEXT_PP(2); |
| text *ret; |
| char *ptr1, |
| *ptr2, |
| *ptr2start, |
| *ptr2end, |
| *ptr_ret; |
| int m, |
| s1len, |
| s2len; |
| int bytelen; |
| |
| /* Negative len is silently taken as zero */ |
| if (len < 0) |
| len = 0; |
| |
| s1len = VARSIZE_ANY_EXHDR(string1); |
| if (s1len < 0) |
| s1len = 0; /* shouldn't happen */ |
| |
| s2len = VARSIZE_ANY_EXHDR(string2); |
| if (s2len < 0) |
| s2len = 0; /* shouldn't happen */ |
| |
| s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); |
| |
| if (s1len > len) |
| s1len = len; /* truncate string1 to len chars */ |
| |
| if (s2len <= 0) |
| len = s1len; /* nothing to pad with, so don't pad */ |
| |
| /* compute worst-case output length */ |
| if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len, |
| &bytelen)) || |
| unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || |
| unlikely(!AllocSizeIsValid(bytelen))) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested length too large"))); |
| |
| ret = (text *) palloc(bytelen); |
| |
| m = len - s1len; |
| |
| ptr2 = ptr2start = VARDATA_ANY(string2); |
| ptr2end = ptr2 + s2len; |
| ptr_ret = VARDATA(ret); |
| |
| while (m--) |
| { |
| int mlen = pg_mblen(ptr2); |
| |
| memcpy(ptr_ret, ptr2, mlen); |
| ptr_ret += mlen; |
| ptr2 += mlen; |
| if (ptr2 == ptr2end) /* wrap around at end of s2 */ |
| ptr2 = ptr2start; |
| } |
| |
| ptr1 = VARDATA_ANY(string1); |
| |
| while (s1len--) |
| { |
| int mlen = pg_mblen(ptr1); |
| |
| memcpy(ptr_ret, ptr1, mlen); |
| ptr_ret += mlen; |
| ptr1 += mlen; |
| } |
| |
| SET_VARSIZE(ret, ptr_ret - (char *) ret); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| |
| /******************************************************************** |
| * |
| * rpad |
| * |
| * Syntax: |
| * |
| * text rpad(text string1, int4 len, text string2) |
| * |
| * Purpose: |
| * |
| * Returns string1, right-padded to length len with the sequence of |
| * characters in string2. If len is less than the length of string1, |
| * instead truncate (on the right) to len. |
| * |
| ********************************************************************/ |
| |
| Datum |
| rpad(PG_FUNCTION_ARGS) |
| { |
| text *string1 = PG_GETARG_TEXT_PP(0); |
| int32 len = PG_GETARG_INT32(1); |
| text *string2 = PG_GETARG_TEXT_PP(2); |
| text *ret; |
| char *ptr1, |
| *ptr2, |
| *ptr2start, |
| *ptr2end, |
| *ptr_ret; |
| int m, |
| s1len, |
| s2len; |
| int bytelen; |
| |
| /* Negative len is silently taken as zero */ |
| if (len < 0) |
| len = 0; |
| |
| s1len = VARSIZE_ANY_EXHDR(string1); |
| if (s1len < 0) |
| s1len = 0; /* shouldn't happen */ |
| |
| s2len = VARSIZE_ANY_EXHDR(string2); |
| if (s2len < 0) |
| s2len = 0; /* shouldn't happen */ |
| |
| s1len = pg_mbstrlen_with_len(VARDATA_ANY(string1), s1len); |
| |
| if (s1len > len) |
| s1len = len; /* truncate string1 to len chars */ |
| |
| if (s2len <= 0) |
| len = s1len; /* nothing to pad with, so don't pad */ |
| |
| /* compute worst-case output length */ |
| if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), len, |
| &bytelen)) || |
| unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || |
| unlikely(!AllocSizeIsValid(bytelen))) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested length too large"))); |
| |
| ret = (text *) palloc(bytelen); |
| |
| m = len - s1len; |
| |
| ptr1 = VARDATA_ANY(string1); |
| ptr_ret = VARDATA(ret); |
| |
| while (s1len--) |
| { |
| int mlen = pg_mblen(ptr1); |
| |
| memcpy(ptr_ret, ptr1, mlen); |
| ptr_ret += mlen; |
| ptr1 += mlen; |
| } |
| |
| ptr2 = ptr2start = VARDATA_ANY(string2); |
| ptr2end = ptr2 + s2len; |
| |
| while (m--) |
| { |
| int mlen = pg_mblen(ptr2); |
| |
| memcpy(ptr_ret, ptr2, mlen); |
| ptr_ret += mlen; |
| ptr2 += mlen; |
| if (ptr2 == ptr2end) /* wrap around at end of s2 */ |
| ptr2 = ptr2start; |
| } |
| |
| SET_VARSIZE(ret, ptr_ret - (char *) ret); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| |
| /******************************************************************** |
| * |
| * btrim |
| * |
| * Syntax: |
| * |
| * text btrim(text string, text set) |
| * |
| * Purpose: |
| * |
| * Returns string with characters removed from the front and back |
| * up to the first character not in set. |
| * |
| ********************************************************************/ |
| |
| Datum |
| btrim(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| text *set = PG_GETARG_TEXT_PP(1); |
| text *ret; |
| |
| ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
| VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), |
| true, true); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| /******************************************************************** |
| * |
| * btrim1 --- btrim with set fixed as ' ' |
| * |
| ********************************************************************/ |
| |
| Datum |
| btrim1(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| text *ret; |
| |
| ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
| " ", 1, |
| true, true); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| /* |
| * Common implementation for btrim, ltrim, rtrim |
| */ |
| static text * |
| dotrim(const char *string, int stringlen, |
| const char *set, int setlen, |
| bool doltrim, bool dortrim) |
| { |
| int i; |
| |
| /* Nothing to do if either string or set is empty */ |
| if (stringlen > 0 && setlen > 0) |
| { |
| if (pg_database_encoding_max_length() > 1) |
| { |
| /* |
| * In the multibyte-encoding case, build arrays of pointers to |
| * character starts, so that we can avoid inefficient checks in |
| * the inner loops. |
| */ |
| const char **stringchars; |
| const char **setchars; |
| int *stringmblen; |
| int *setmblen; |
| int stringnchars; |
| int setnchars; |
| int resultndx; |
| int resultnchars; |
| const char *p; |
| int len; |
| int mblen; |
| const char *str_pos; |
| int str_len; |
| |
| stringchars = (const char **) palloc(stringlen * sizeof(char *)); |
| stringmblen = (int *) palloc(stringlen * sizeof(int)); |
| stringnchars = 0; |
| p = string; |
| len = stringlen; |
| while (len > 0) |
| { |
| stringchars[stringnchars] = p; |
| stringmblen[stringnchars] = mblen = pg_mblen(p); |
| stringnchars++; |
| p += mblen; |
| len -= mblen; |
| } |
| |
| setchars = (const char **) palloc(setlen * sizeof(char *)); |
| setmblen = (int *) palloc(setlen * sizeof(int)); |
| setnchars = 0; |
| p = set; |
| len = setlen; |
| while (len > 0) |
| { |
| setchars[setnchars] = p; |
| setmblen[setnchars] = mblen = pg_mblen(p); |
| setnchars++; |
| p += mblen; |
| len -= mblen; |
| } |
| |
| resultndx = 0; /* index in stringchars[] */ |
| resultnchars = stringnchars; |
| |
| if (doltrim) |
| { |
| while (resultnchars > 0) |
| { |
| str_pos = stringchars[resultndx]; |
| str_len = stringmblen[resultndx]; |
| for (i = 0; i < setnchars; i++) |
| { |
| if (str_len == setmblen[i] && |
| memcmp(str_pos, setchars[i], str_len) == 0) |
| break; |
| } |
| if (i >= setnchars) |
| break; /* no match here */ |
| string += str_len; |
| stringlen -= str_len; |
| resultndx++; |
| resultnchars--; |
| } |
| } |
| |
| if (dortrim) |
| { |
| while (resultnchars > 0) |
| { |
| str_pos = stringchars[resultndx + resultnchars - 1]; |
| str_len = stringmblen[resultndx + resultnchars - 1]; |
| for (i = 0; i < setnchars; i++) |
| { |
| if (str_len == setmblen[i] && |
| memcmp(str_pos, setchars[i], str_len) == 0) |
| break; |
| } |
| if (i >= setnchars) |
| break; /* no match here */ |
| stringlen -= str_len; |
| resultnchars--; |
| } |
| } |
| |
| pfree(stringchars); |
| pfree(stringmblen); |
| pfree(setchars); |
| pfree(setmblen); |
| } |
| else |
| { |
| /* |
| * In the single-byte-encoding case, we don't need such overhead. |
| */ |
| if (doltrim) |
| { |
| while (stringlen > 0) |
| { |
| char str_ch = *string; |
| |
| for (i = 0; i < setlen; i++) |
| { |
| if (str_ch == set[i]) |
| break; |
| } |
| if (i >= setlen) |
| break; /* no match here */ |
| string++; |
| stringlen--; |
| } |
| } |
| |
| if (dortrim) |
| { |
| while (stringlen > 0) |
| { |
| char str_ch = string[stringlen - 1]; |
| |
| for (i = 0; i < setlen; i++) |
| { |
| if (str_ch == set[i]) |
| break; |
| } |
| if (i >= setlen) |
| break; /* no match here */ |
| stringlen--; |
| } |
| } |
| } |
| } |
| |
| /* Return selected portion of string */ |
| return cstring_to_text_with_len(string, stringlen); |
| } |
| |
| /* |
| * Common implementation for bytea versions of btrim, ltrim, rtrim |
| */ |
| bytea * |
| dobyteatrim(bytea *string, bytea *set, bool doltrim, bool dortrim) |
| { |
| bytea *ret; |
| char *ptr, |
| *end, |
| *ptr2, |
| *ptr2start, |
| *end2; |
| int m, |
| stringlen, |
| setlen; |
| |
| stringlen = VARSIZE_ANY_EXHDR(string); |
| setlen = VARSIZE_ANY_EXHDR(set); |
| |
| if (stringlen <= 0 || setlen <= 0) |
| return string; |
| |
| m = stringlen; |
| ptr = VARDATA_ANY(string); |
| end = ptr + stringlen - 1; |
| ptr2start = VARDATA_ANY(set); |
| end2 = ptr2start + setlen - 1; |
| |
| if (doltrim) |
| { |
| while (m > 0) |
| { |
| ptr2 = ptr2start; |
| while (ptr2 <= end2) |
| { |
| if (*ptr == *ptr2) |
| break; |
| ++ptr2; |
| } |
| if (ptr2 > end2) |
| break; |
| ptr++; |
| m--; |
| } |
| } |
| |
| if (dortrim) |
| { |
| while (m > 0) |
| { |
| ptr2 = ptr2start; |
| while (ptr2 <= end2) |
| { |
| if (*end == *ptr2) |
| break; |
| ++ptr2; |
| } |
| if (ptr2 > end2) |
| break; |
| end--; |
| m--; |
| } |
| } |
| |
| ret = (bytea *) palloc(VARHDRSZ + m); |
| SET_VARSIZE(ret, VARHDRSZ + m); |
| memcpy(VARDATA(ret), ptr, m); |
| return ret; |
| } |
| |
| /******************************************************************** |
| * |
| * byteatrim |
| * |
| * Syntax: |
| * |
| * bytea byteatrim(bytea string, bytea set) |
| * |
| * Purpose: |
| * |
| * Returns string with characters removed from the front and back |
| * up to the first character not in set. |
| * |
| * Cloned from btrim and modified as required. |
| ********************************************************************/ |
| |
| Datum |
| byteatrim(PG_FUNCTION_ARGS) |
| { |
| bytea *string = PG_GETARG_BYTEA_PP(0); |
| bytea *set = PG_GETARG_BYTEA_PP(1); |
| bytea *ret; |
| |
| ret = dobyteatrim(string, set, true, true); |
| |
| PG_RETURN_BYTEA_P(ret); |
| } |
| |
| /******************************************************************** |
| * |
| * bytealtrim |
| * |
| * Syntax: |
| * |
| * bytea bytealtrim(bytea string, bytea set) |
| * |
| * Purpose: |
| * |
| * Returns string with initial characters removed up to the first |
| * character not in set. |
| * |
| ********************************************************************/ |
| |
| Datum |
| bytealtrim(PG_FUNCTION_ARGS) |
| { |
| bytea *string = PG_GETARG_BYTEA_PP(0); |
| bytea *set = PG_GETARG_BYTEA_PP(1); |
| bytea *ret; |
| |
| ret = dobyteatrim(string, set, true, false); |
| |
| PG_RETURN_BYTEA_P(ret); |
| } |
| |
| /******************************************************************** |
| * |
| * byteartrim |
| * |
| * Syntax: |
| * |
| * bytea byteartrim(bytea string, bytea set) |
| * |
| * Purpose: |
| * |
| * Returns string with final characters removed after the last |
| * character not in set. |
| * |
| ********************************************************************/ |
| |
| Datum |
| byteartrim(PG_FUNCTION_ARGS) |
| { |
| bytea *string = PG_GETARG_BYTEA_PP(0); |
| bytea *set = PG_GETARG_BYTEA_PP(1); |
| bytea *ret; |
| |
| ret = dobyteatrim(string, set, false, true); |
| |
| PG_RETURN_BYTEA_P(ret); |
| } |
| |
| /******************************************************************** |
| * |
| * ltrim |
| * |
| * Syntax: |
| * |
| * text ltrim(text string, text set) |
| * |
| * Purpose: |
| * |
| * Returns string with initial characters removed up to the first |
| * character not in set. |
| * |
| ********************************************************************/ |
| |
| Datum |
| ltrim(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| text *set = PG_GETARG_TEXT_PP(1); |
| text *ret; |
| |
| ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
| VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), |
| true, false); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| /******************************************************************** |
| * |
| * ltrim1 --- ltrim with set fixed as ' ' |
| * |
| ********************************************************************/ |
| |
| Datum |
| ltrim1(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| text *ret; |
| |
| ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
| " ", 1, |
| true, false); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| /******************************************************************** |
| * |
| * rtrim |
| * |
| * Syntax: |
| * |
| * text rtrim(text string, text set) |
| * |
| * Purpose: |
| * |
| * Returns string with final characters removed after the last |
| * character not in set. |
| * |
| ********************************************************************/ |
| |
| Datum |
| rtrim(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| text *set = PG_GETARG_TEXT_PP(1); |
| text *ret; |
| |
| ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
| VARDATA_ANY(set), VARSIZE_ANY_EXHDR(set), |
| false, true); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| /******************************************************************** |
| * |
| * rtrim1 --- rtrim with set fixed as ' ' |
| * |
| ********************************************************************/ |
| |
| Datum |
| rtrim1(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| text *ret; |
| |
| ret = dotrim(VARDATA_ANY(string), VARSIZE_ANY_EXHDR(string), |
| " ", 1, |
| false, true); |
| |
| PG_RETURN_TEXT_P(ret); |
| } |
| |
| |
| /******************************************************************** |
| * |
| * translate |
| * |
| * Syntax: |
| * |
| * text translate(text string, text from, text to) |
| * |
| * Purpose: |
| * |
| * Returns string after replacing all occurrences of characters in from |
| * with the corresponding character in to. If from is longer than to, |
| * occurrences of the extra characters in from are deleted. |
| * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>. |
| * |
| ********************************************************************/ |
| |
| Datum |
| translate(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| text *from = PG_GETARG_TEXT_PP(1); |
| text *to = PG_GETARG_TEXT_PP(2); |
| text *result; |
| char *from_ptr, |
| *to_ptr, |
| *to_end; |
| char *source, |
| *target; |
| int m, |
| fromlen, |
| tolen, |
| retlen, |
| i; |
| int bytelen; |
| int len; |
| int source_len; |
| int from_index; |
| |
| m = VARSIZE_ANY_EXHDR(string); |
| if (m <= 0) |
| PG_RETURN_TEXT_P(string); |
| source = VARDATA_ANY(string); |
| |
| fromlen = VARSIZE_ANY_EXHDR(from); |
| from_ptr = VARDATA_ANY(from); |
| tolen = VARSIZE_ANY_EXHDR(to); |
| to_ptr = VARDATA_ANY(to); |
| to_end = to_ptr + tolen; |
| |
| /* |
| * The worst-case expansion is to substitute a max-length character for a |
| * single-byte character at each position of the string. |
| */ |
| if (unlikely(pg_mul_s32_overflow(pg_database_encoding_max_length(), m, |
| &bytelen)) || |
| unlikely(pg_add_s32_overflow(bytelen, VARHDRSZ, &bytelen)) || |
| unlikely(!AllocSizeIsValid(bytelen))) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested length too large"))); |
| |
| result = (text *) palloc(bytelen); |
| |
| target = VARDATA(result); |
| retlen = 0; |
| |
| while (m > 0) |
| { |
| source_len = pg_mblen(source); |
| from_index = 0; |
| |
| for (i = 0; i < fromlen; i += len) |
| { |
| len = pg_mblen(&from_ptr[i]); |
| if (len == source_len && |
| memcmp(source, &from_ptr[i], len) == 0) |
| break; |
| |
| from_index++; |
| } |
| if (i < fromlen) |
| { |
| /* substitute, or delete if no corresponding "to" character */ |
| char *p = to_ptr; |
| |
| for (i = 0; i < from_index; i++) |
| { |
| if (p >= to_end) |
| break; |
| p += pg_mblen(p); |
| } |
| if (p < to_end) |
| { |
| len = pg_mblen(p); |
| memcpy(target, p, len); |
| target += len; |
| retlen += len; |
| } |
| } |
| else |
| { |
| /* no match, so copy */ |
| memcpy(target, source, source_len); |
| target += source_len; |
| retlen += source_len; |
| } |
| |
| source += source_len; |
| m -= source_len; |
| } |
| |
| SET_VARSIZE(result, retlen + VARHDRSZ); |
| |
| /* |
| * The function result is probably much bigger than needed, if we're using |
| * a multibyte encoding, but it's not worth reallocating it; the result |
| * probably won't live long anyway. |
| */ |
| |
| PG_RETURN_TEXT_P(result); |
| } |
| |
| /******************************************************************** |
| * |
| * ascii |
| * |
| * Syntax: |
| * |
| * int ascii(text string) |
| * |
| * Purpose: |
| * |
| * Returns the decimal representation of the first character from |
| * string. |
| * If the string is empty we return 0. |
| * If the database encoding is UTF8, we return the Unicode codepoint. |
| * If the database encoding is any other multi-byte encoding, we |
| * return the value of the first byte if it is an ASCII character |
| * (range 1 .. 127), or raise an error. |
| * For all other encodings we return the value of the first byte, |
| * (range 1..255). |
| * |
| ********************************************************************/ |
| |
| Datum |
| ascii(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| int encoding = GetDatabaseEncoding(); |
| unsigned char *data; |
| |
| if (VARSIZE_ANY_EXHDR(string) <= 0) |
| PG_RETURN_INT32(0); |
| |
| data = (unsigned char *) VARDATA_ANY(string); |
| |
| if (encoding == PG_UTF8 && *data > 127) |
| { |
| /* return the code point for Unicode */ |
| |
| int result = 0, |
| tbytes = 0, |
| i; |
| |
| if (*data >= 0xF0) |
| { |
| result = *data & 0x07; |
| tbytes = 3; |
| } |
| else if (*data >= 0xE0) |
| { |
| result = *data & 0x0F; |
| tbytes = 2; |
| } |
| else |
| { |
| Assert(*data > 0xC0); |
| result = *data & 0x1f; |
| tbytes = 1; |
| } |
| |
| Assert(tbytes > 0); |
| |
| for (i = 1; i <= tbytes; i++) |
| { |
| Assert((data[i] & 0xC0) == 0x80); |
| result = (result << 6) + (data[i] & 0x3f); |
| } |
| |
| PG_RETURN_INT32(result); |
| } |
| else |
| { |
| if (pg_encoding_max_length(encoding) > 1 && *data > 127) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested character too large"))); |
| |
| |
| PG_RETURN_INT32((int32) *data); |
| } |
| } |
| |
| /******************************************************************** |
| * |
| * chr |
| * |
| * Syntax: |
| * |
| * text chr(int val) |
| * |
| * Purpose: |
| * |
| * Returns the character having the binary equivalent to val. |
| * |
| * For UTF8 we treat the argument as a Unicode code point. |
| * For other multi-byte encodings we raise an error for arguments |
| * outside the strict ASCII range (1..127). |
| * |
| * It's important that we don't ever return a value that is not valid |
| * in the database encoding, so that this doesn't become a way for |
| * invalid data to enter the database. |
| * |
| ********************************************************************/ |
| |
| Datum |
| chr (PG_FUNCTION_ARGS) |
| { |
| int32 arg = PG_GETARG_INT32(0); |
| uint32 cvalue; |
| text *result; |
| int encoding = GetDatabaseEncoding(); |
| |
| /* |
| * Error out on arguments that make no sense or that we can't validly |
| * represent in the encoding. |
| */ |
| if (arg < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("character number must be positive"))); |
| else if (arg == 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("null character not permitted"))); |
| |
| cvalue = arg; |
| |
| if (encoding == PG_UTF8 && cvalue > 127) |
| { |
| /* for Unicode we treat the argument as a code point */ |
| int bytes; |
| unsigned char *wch; |
| |
| /* |
| * We only allow valid Unicode code points; per RFC3629 that stops at |
| * U+10FFFF, even though 4-byte UTF8 sequences can hold values up to |
| * U+1FFFFF. |
| */ |
| if (cvalue > 0x0010ffff) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested character too large for encoding: %u", |
| cvalue))); |
| |
| if (cvalue > 0xffff) |
| bytes = 4; |
| else if (cvalue > 0x07ff) |
| bytes = 3; |
| else |
| bytes = 2; |
| |
| result = (text *) palloc(VARHDRSZ + bytes); |
| SET_VARSIZE(result, VARHDRSZ + bytes); |
| wch = (unsigned char *) VARDATA(result); |
| |
| if (bytes == 2) |
| { |
| wch[0] = 0xC0 | ((cvalue >> 6) & 0x1F); |
| wch[1] = 0x80 | (cvalue & 0x3F); |
| } |
| else if (bytes == 3) |
| { |
| wch[0] = 0xE0 | ((cvalue >> 12) & 0x0F); |
| wch[1] = 0x80 | ((cvalue >> 6) & 0x3F); |
| wch[2] = 0x80 | (cvalue & 0x3F); |
| } |
| else |
| { |
| wch[0] = 0xF0 | ((cvalue >> 18) & 0x07); |
| wch[1] = 0x80 | ((cvalue >> 12) & 0x3F); |
| wch[2] = 0x80 | ((cvalue >> 6) & 0x3F); |
| wch[3] = 0x80 | (cvalue & 0x3F); |
| } |
| |
| /* |
| * The preceding range check isn't sufficient, because UTF8 excludes |
| * Unicode "surrogate pair" codes. Make sure what we created is valid |
| * UTF8. |
| */ |
| if (!pg_utf8_islegal(wch, bytes)) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested character not valid for encoding: %u", |
| cvalue))); |
| } |
| else |
| { |
| bool is_mb; |
| |
| is_mb = pg_encoding_max_length(encoding) > 1; |
| |
| if ((is_mb && (cvalue > 127)) || (!is_mb && (cvalue > 255))) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested character too large for encoding: %u", |
| cvalue))); |
| |
| result = (text *) palloc(VARHDRSZ + 1); |
| SET_VARSIZE(result, VARHDRSZ + 1); |
| *VARDATA(result) = (char) cvalue; |
| } |
| |
| PG_RETURN_TEXT_P(result); |
| } |
| |
| /******************************************************************** |
| * |
| * repeat |
| * |
| * Syntax: |
| * |
| * text repeat(text string, int val) |
| * |
| * Purpose: |
| * |
| * Repeat string by val. |
| * |
| ********************************************************************/ |
| |
| Datum |
| repeat(PG_FUNCTION_ARGS) |
| { |
| text *string = PG_GETARG_TEXT_PP(0); |
| int32 count = PG_GETARG_INT32(1); |
| text *result; |
| int slen, |
| tlen; |
| int i; |
| char *cp, |
| *sp; |
| |
| if (count < 0) |
| count = 0; |
| |
| slen = VARSIZE_ANY_EXHDR(string); |
| |
| if (unlikely(pg_mul_s32_overflow(count, slen, &tlen)) || |
| unlikely(pg_add_s32_overflow(tlen, VARHDRSZ, &tlen)) || |
| unlikely(!AllocSizeIsValid(tlen))) |
| ereport(ERROR, |
| (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), |
| errmsg("requested length too large"))); |
| |
| result = (text *) palloc(tlen); |
| |
| SET_VARSIZE(result, tlen); |
| cp = VARDATA(result); |
| sp = VARDATA_ANY(string); |
| for (i = 0; i < count; i++) |
| { |
| memcpy(cp, sp, slen); |
| cp += slen; |
| CHECK_FOR_INTERRUPTS(); |
| } |
| |
| PG_RETURN_TEXT_P(result); |
| } |