blob: 21c4afb5faaaaf384d8bc82a872b4e4e4189daf1 [file] [log] [blame]
/*-------------------------------------------------------------------------
*
* varlena.c
* Functions for the variable-length built-in types.
*
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.154 2007/01/05 22:19:42 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <ctype.h>
#include "access/tupmacs.h"
#include "access/tuptoaster.h"
#include "catalog/pg_type.h"
#include "libpq/md5.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
#include "parser/scansup.h"
#include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "utils/string_wrapper.h"
#include "utils/memutils.h"
typedef struct varlena unknown;
typedef struct
{
bool use_wchar; /* T if multibyte encoding */
char *str1; /* use these if not use_wchar */
char *str2; /* note: these point to original texts */
pg_wchar *wstr1; /* use these if use_wchar */
pg_wchar *wstr2; /* note: these are palloc'd */
int len1; /* string lengths in logical characters */
int len2;
} TextPositionState;
#define DatumGetUnknownP(X) ((unknown *) PG_DETOAST_DATUM(X))
#define DatumGetUnknownPCopy(X) ((unknown *) PG_DETOAST_DATUM_COPY(X))
#define PG_GETARG_UNKNOWN_P(n) DatumGetUnknownP(PG_GETARG_DATUM(n))
#define PG_GETARG_UNKNOWN_P_COPY(n) DatumGetUnknownPCopy(PG_GETARG_DATUM(n))
#define PG_RETURN_UNKNOWN_P(x) PG_RETURN_POINTER(x)
#define PG_TEXTARG_GET_STR(arg_) \
DatumGetCString(DirectFunctionCall1(textout, PG_GETARG_DATUM(arg_)))
#define PG_TEXT_GET_STR(textp_) \
DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(textp_)))
#define PG_STR_GET_TEXT(str_) \
DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(str_)))
/*
* Max considered sub-string size is set to MaxAllocSize - 4MB).
* The 4MB is saved aside for memory allocation overhead such
* as allocation set headers.
*/
#define MAX_STRING_BYTES ((Size) (MaxAllocSize - 0x400000))
static int text_position_ptr_len(char* p1, int len1, char *p2, int len2);
static void text_position_setup_ptr_len(char* p1, int len1, char* p2, int len2, TextPositionState *state);
static int text_position_next(int start_pos, TextPositionState *state);
static void text_position_cleanup(TextPositionState *state);
static text *text_substring(Datum str,
int32 start,
int32 length,
bool length_not_specified);
static void appendStringInfoText(StringInfo str, const text *t);
/*****************************************************************************
* CONVERSION ROUTINES EXPORTED FOR USE BY C CODE *
*****************************************************************************/
/*
* cstring_to_text
*
* Create a text value from a null-terminated C string.
*
* The new text value is freshly palloc'd with a full-size VARHDR.
*/
text *
cstring_to_text(const char *s)
{
return cstring_to_text_with_len(s, strlen(s));
}
/*
* cstring_to_text_with_len
*
* Same as cstring_to_text except the caller specifies the string length;
* the string need not be null_terminated.
*/
text *
cstring_to_text_with_len(const char *s, int len)
{
text *result = (text *) palloc(len + VARHDRSZ);
SET_VARSIZE(result, len + VARHDRSZ);
memcpy(VARDATA(result), s, len);
return result;
}
/*
* text_to_cstring
*
* Create a palloc'd, null-terminated C string from a text value.
*
* We support being passed a compressed or toasted text value.
* This is a bit bogus since such values shouldn't really be referred to as
* "text *", but it seems useful for robustness. If we didn't handle that
* case here, we'd need another routine that did, anyway.
*/
char *
text_to_cstring(const text *t)
{
/* must cast away the const, unfortunately */
text *tunpacked = pg_detoast_datum_packed((struct varlena *) t);
int len = VARSIZE_ANY_EXHDR(tunpacked);
char *result;
result = (char *) palloc(len + 1);
memcpy(result, VARDATA_ANY(tunpacked), len);
result[len] = '\0';
if (tunpacked != t)
pfree(tunpacked);
return result;
}
/*
* text_to_cstring_buffer
*
* Copy a text value into a caller-supplied buffer of size dst_len.
*
* The text string is truncated if necessary to fit. The result is
* guaranteed null-terminated (unless dst_len == 0).
*
* We support being passed a compressed or toasted text value.
* This is a bit bogus since such values shouldn't really be referred to as
* "text *", but it seems useful for robustness. If we didn't handle that
* case here, we'd need another routine that did, anyway.
*/
void
text_to_cstring_buffer(const text *src, char *dst, size_t dst_len)
{
/* must cast away the const, unfortunately */
text *srcunpacked = pg_detoast_datum_packed((struct varlena *) src);
size_t src_len = VARSIZE_ANY_EXHDR(srcunpacked);
if (dst_len > 0)
{
dst_len--;
if (dst_len >= src_len)
dst_len = src_len;
else /* ensure truncation is encoding-safe */
dst_len = pg_mbcliplen(VARDATA_ANY(srcunpacked), src_len, dst_len);
memcpy(dst, VARDATA_ANY(srcunpacked), dst_len);
dst[dst_len] = '\0';
}
if (srcunpacked != src)
pfree(srcunpacked);
}
/*****************************************************************************
* USER I/O ROUTINES *
*****************************************************************************/
#define VAL(CH) ((CH) - '0')
#define DIG(VAL) ((VAL) + '0')
/*
* byteain - converts from printable representation of byte array
*
* Non-printable characters must be passed as '\nnn' (octal) and are
* converted to internal form. '\' must be passed as '\\'.
* ereport(ERROR, ...) if bad form.
*
* BUGS:
* The input is scanned twice.
* The error checking of input is minimal.
*/
Datum
byteain(PG_FUNCTION_ARGS)
{
char *inputText = PG_GETARG_CSTRING(0);
char *tp;
char *rp;
int byte;
bytea *result;
for (byte = 0, tp = inputText; *tp != '\0'; byte++)
{
if (tp[0] != '\\')
tp++;
else if ((tp[0] == '\\') &&
(tp[1] >= '0' && tp[1] <= '3') &&
(tp[2] >= '0' && tp[2] <= '7') &&
(tp[3] >= '0' && tp[3] <= '7'))
tp += 4;
else if ((tp[0] == '\\') &&
(tp[1] == '\\'))
tp += 2;
else
{
/*
* one backslash, not followed by 0 or ### valid octal
*/
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type bytea"),
errOmitLocation(true)));
}
}
byte += VARHDRSZ;
result = (bytea *) palloc(byte);
SET_VARSIZE(result, byte);
tp = inputText;
rp = VARDATA(result);
while (*tp != '\0')
{
if (tp[0] != '\\')
*rp++ = *tp++;
else if ((tp[0] == '\\') &&
(tp[1] >= '0' && tp[1] <= '3') &&
(tp[2] >= '0' && tp[2] <= '7') &&
(tp[3] >= '0' && tp[3] <= '7'))
{
byte = VAL(tp[1]);
byte <<= 3;
byte += VAL(tp[2]);
byte <<= 3;
*rp++ = byte + VAL(tp[3]);
tp += 4;
}
else if ((tp[0] == '\\') &&
(tp[1] == '\\'))
{
*rp++ = '\\';
tp += 2;
}
else
{
/*
* We should never get here. The first pass should not allow it.
*/
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type bytea"),
errOmitLocation(true)));
}
}
PG_RETURN_BYTEA_P(result);
}
/*
* byteaout - converts to printable representation of byte array
*
* Non-printable characters are inserted as '\nnn' (octal) and '\' as
* '\\'.
*
* NULL vlena should be an error--returning string with NULL for now.
*/
Datum
byteaout(PG_FUNCTION_ARGS)
{
bytea *vlena = PG_GETARG_BYTEA_P(0);
char *result;
char *vp;
char *rp;
int val; /* holds unprintable chars */
int i;
int len;
len = 1; /* empty string has 1 char */
vp = VARDATA_ANY(vlena);
for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
{
if (*vp == '\\')
len += 2;
else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
len += 4;
else
len++;
}
rp = result = (char *) palloc(len);
vp = VARDATA_ANY(vlena);
for (i = VARSIZE_ANY_EXHDR(vlena); i != 0; i--, vp++)
{
if (*vp == '\\')
{
*rp++ = '\\';
*rp++ = '\\';
}
else if ((unsigned char) *vp < 0x20 || (unsigned char) *vp > 0x7e)
{
val = *vp;
rp[0] = '\\';
rp[3] = DIG(val & 07);
val >>= 3;
rp[2] = DIG(val & 07);
val >>= 3;
rp[1] = DIG(val & 03);
rp += 4;
}
else
*rp++ = *vp;
}
*rp = '\0';
PG_RETURN_CSTRING(result);
}
/*
* bytearecv - converts external binary format to bytea
*/
Datum
bytearecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
bytea *result;
int nbytes;
nbytes = buf->len - buf->cursor;
result = (bytea *) palloc(nbytes + VARHDRSZ);
SET_VARSIZE(result, nbytes + VARHDRSZ);
pq_copymsgbytes(buf, VARDATA(result), nbytes);
PG_RETURN_BYTEA_P(result);
}
/*
* byteasend - converts bytea to binary format
*
* This is a special case: just copy the input...
*/
Datum
byteasend(PG_FUNCTION_ARGS)
{
bytea *vlena = PG_GETARG_BYTEA_P_COPY(0);
PG_RETURN_BYTEA_P(vlena);
}
/*
* textin - converts "..." to internal representation
*/
Datum
textin(PG_FUNCTION_ARGS)
{
char *inputText = PG_GETARG_CSTRING(0);
text *result;
int len;
len = strlen(inputText);
result = (text *) palloc(len + VARHDRSZ);
SET_VARSIZE(result, len + VARHDRSZ);
memcpy(VARDATA(result), inputText, len);
PG_RETURN_TEXT_P(result);
}
/*
* textout - converts internal representation to "..."
*/
Datum
textout(PG_FUNCTION_ARGS)
{
char *result;
Datum d = PG_GETARG_DATUM(0);
char *p; void *tofree; int len;
varattrib_untoast_ptr_len(d, &p, &len, &tofree);
result = (char *) palloc(len + 1);
memcpy(result, p, len);
result[len] = '\0';
if(tofree != NULL)
pfree(tofree);
PG_RETURN_CSTRING(result);
}
/*
* textrecv - converts external binary format to text
*/
Datum
textrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
text *result;
char *str;
int nbytes;
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
result = (text *) palloc(nbytes + VARHDRSZ);
SET_VARSIZE(result, nbytes + VARHDRSZ);
memcpy(VARDATA(result), str, nbytes);
pfree(str);
PG_RETURN_TEXT_P(result);
}
/*
* textsend - converts text to binary format
*/
Datum
textsend(PG_FUNCTION_ARGS)
{
text *t = PG_GETARG_TEXT_PP(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendtext(&buf, VARDATA_ANY(t), VARSIZE_ANY_EXHDR(t));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
/*
* unknownin - converts "..." to internal representation
*/
Datum
unknownin(PG_FUNCTION_ARGS)
{
char *str = PG_GETARG_CSTRING(0);
/* representation is same as cstring */
PG_RETURN_CSTRING(pstrdup(str));
}
/*
* unknownout - converts internal representation to "..."
*/
Datum
unknownout(PG_FUNCTION_ARGS)
{
/* representation is same as cstring */
char *str = PG_GETARG_CSTRING(0);
PG_RETURN_CSTRING(pstrdup(str));
}
/*
* unknownrecv - converts external binary format to unknown
*/
Datum
unknownrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
char *str;
int nbytes;
str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
/* representation is same as cstring */
PG_RETURN_CSTRING(str);
}
/*
* unknownsend - converts unknown to binary format
*/
Datum
unknownsend(PG_FUNCTION_ARGS)
{
/* representation is same as cstring */
char *str = PG_GETARG_CSTRING(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendtext(&buf, str, strlen(str));
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
/* ========== PUBLIC ROUTINES ========== */
/*
* text_length -
* Does the real work for textlen()
*
* This is broken out so it can be called directly by other string processing
* functions. Note that the argument is passed as a Datum, to indicate that
* it may still be in compressed form. We can avoid decompressing it at all
* in some cases.
*/
static inline int32
text_length(Datum str)
{
/* fastpath when max encoding length is one */
if (pg_database_encoding_max_length() == 1)
PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
else
{
int32 ret;
char *p; void *tofree; int len;
varattrib_untoast_ptr_len(str, &p, &len, &tofree);
ret = pg_mbstrlen_with_len(p, len);
if(tofree)
pfree(tofree);
PG_RETURN_INT32(ret);
}
}
/*
* textlen -
* returns the logical length of a text*
* (which is less than the VARSIZE of the text*)
*/
Datum
textlen(PG_FUNCTION_ARGS)
{
Datum str = PG_GETARG_DATUM(0);
/* try to avoid decompressing argument */
PG_RETURN_INT32(text_length(str));
}
/*
* textoctetlen -
* returns the physical length of a text*
* (which is less than the VARSIZE of the text*)
*/
Datum
textoctetlen(PG_FUNCTION_ARGS)
{
Datum str = PG_GETARG_DATUM(0);
/* We need not detoast the input at all */
PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
}
/*
* Generaic concatenation of two varlena. The varlena may comes in differnt
* flavors, but they are really the same. Duplicate the body of the function makes
* no sense.
*/
static inline Datum generic_varlena_cat(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
Datum d1 = PG_GETARG_DATUM(1);
char *p1; void *tofree1; int len1;
int len;
text *result;
char *ptr;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
len = len0 + len1 + VARHDRSZ;
result = (text *) palloc(len);
/* Set size of result string... */
SET_VARSIZE(result, len);
/* Fill data field of result string... */
ptr = VARDATA(result);
if (len0 > 0)
memcpy(ptr, p0, len0);
if (len1 > 0)
memcpy(ptr + len0, p1, len1);
PG_RETURN_TEXT_P(result);
}
/*
* textcat -
* takes two text* and returns a text* that is the concatenation of
* the two.
*
* Rewritten by Sapa, sapa@hq.icb.chel.su. 8-Jul-96.
* Updated by Thomas, Thomas.Lockhart@jpl.nasa.gov 1997-07-10.
* Allocate space for output in all cases.
* XXX - thomas 1997-07-10
*/
Datum
textcat(PG_FUNCTION_ARGS)
{
return generic_varlena_cat(fcinfo);
}
/*
* charlen_to_bytelen()
* Compute the number of bytes occupied by n characters starting at *p
*
* It is caller's responsibility that there actually are n characters;
* the string need not be null-terminated.
*/
static int
charlen_to_bytelen(const char *p, int n)
{
if (pg_database_encoding_max_length() == 1)
{
/* Optimization for single-byte encodings */
return n;
}
else
{
const char *s;
for (s = p; n > 0; n--)
s += pg_mblen(s);
return s - p;
}
}
/* find_memory_limited_substring()
* Computes the sub-string length in number of characters and number
* of bytes where the sub-string consumes up to "memoryLimit" amount of memory.
*
* Parameters:
* strStart: starting pointer in the string
* byteLen: number of bytes in the string, starting from strStart
* memoryLimit: max string size in terms of bytes
*
* Out parameters:
* subStringByteLen: length of chosen sub-string in bytes
* subStringCharLen: length of chosen sub-string in character count
*
* It is caller's responsibility that there actually are byteLen bytes
* starting from strStart; the string needs not be null-terminated.
*/
static void
find_memory_limited_substring(const char *strStart, int byteLen, int memoryLimit, int *subStringByteLen, int *subStringCharLen)
{
AssertArg(byteLen > memoryLimit);
AssertArg(NULL != strStart);
AssertArg(NULL != subStringCharLen);
if (pg_database_encoding_max_length() == 1)
{
/* Optimization for single-byte encodings */
*subStringByteLen = byteLen < memoryLimit ? byteLen : memoryLimit;
*subStringCharLen = *subStringByteLen;
return;
}
else
{
const char *strCurPointer = strStart;;
int consumedBytes = 0;
int consumedChars = 0;
while (consumedBytes <= byteLen)
{
int curCharBytes = pg_mblen(strCurPointer);
strCurPointer += curCharBytes;
consumedChars++;
consumedBytes += curCharBytes;
if (consumedBytes > memoryLimit)
{
*subStringByteLen = consumedBytes - curCharBytes;
*subStringCharLen = consumedChars - 1;
Insist((*subStringByteLen > 0) && (*subStringCharLen > 0));
return;
}
}
}
}
/*
* text_substr()
* Return a substring starting at the specified position.
* - thomas 1997-12-31
*
* Input:
* - string
* - starting position (is one-based)
* - string length
*
* If the starting position is zero or less, then return from the start of the string
* adjusting the length to be consistent with the "negative start" per SQL92.
* If the length is less than zero, return the remaining string.
*
* Added multibyte support.
* - Tatsuo Ishii 1998-4-21
* Changed behavior if starting position is less than one to conform to SQL92 behavior.
* Formerly returned the entire string; now returns a portion.
* - Thomas Lockhart 1998-12-10
* Now uses faster TOAST-slicing interface
* - John Gray 2002-02-22
* Remove "#ifdef MULTIBYTE" and test for encoding_max_length instead. Change
* behaviors conflicting with SQL92 to meet SQL92 (if E = S + L < S throw
* error; if E < 1, return '', not entire string). Fixed MB related bug when
* S > LC and < LC + 4 sometimes garbage characters are returned.
* - Joe Conway 2002-08-10
*/
Datum
text_substr(PG_FUNCTION_ARGS)
{
PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
PG_GETARG_INT32(1),
PG_GETARG_INT32(2),
false));
}
/*
* text_substr_no_len -
* Wrapper to avoid opr_sanity failure due to
* one function accepting a different number of args.
*/
Datum
text_substr_no_len(PG_FUNCTION_ARGS)
{
PG_RETURN_TEXT_P(text_substring(PG_GETARG_DATUM(0),
PG_GETARG_INT32(1),
-1, true));
}
/*
* text_substring -
* Does the real work for text_substr() and text_substr_no_len()
*
* This is broken out so it can be called directly by other string processing
* functions. Note that the argument is passed as a Datum, to indicate that
* it may still be in compressed/toasted form. We can avoid detoasting all
* of it in some cases.
*
* The result is always a freshly palloc'd datum.
*/
static text *
text_substring(Datum str, int32 start, int32 length, bool length_not_specified)
{
int32 eml = pg_database_encoding_max_length();
int32 S = start; /* start position */
int32 S1; /* adjusted start position */
int32 L1; /* adjusted substring length */
/* life is easy if the encoding max length is 1 */
if (eml == 1)
{
S1 = Max(S, 1);
if (length_not_specified) /* special case - get length to end of
* string */
L1 = -1;
else
{
/* end position */
int E = S + length;
/*
* A negative value for L is the only way for the end position to
* be before the start. SQL99 says to throw an error.
*/
if (E < S)
ereport(ERROR,
(errcode(ERRCODE_SUBSTRING_ERROR),
errmsg("negative substring length not allowed"),
errOmitLocation(true)));
/*
* A zero or negative value for the end position can happen if the
* start was negative or one. SQL99 says to return a zero-length
* string.
*/
if (E < 1)
return cstring_to_text("");
L1 = E - S1;
}
/*
* If the start position is past the end of the string, SQL99 says to
* return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do
* that for us. Convert to zero-based starting position
*/
return DatumGetTextPSlice(str, S1 - 1, L1);
}
else if (eml > 1)
{
/*
* When encoding max length is > 1, we can't get LC without
* detoasting, so we'll grab a conservatively large slice now and go
* back later to do the right thing
*/
int32 slice_start;
int32 slice_size;
int32 slice_strlen;
text *slice;
int32 E1;
int32 i;
char *p;
char *s;
text *ret;
/*
* if S is past the end of the string, the tuple toaster will return a
* zero-length string to us
*/
S1 = Max(S, 1);
/*
* We need to start at position zero because there is no way to know
* in advance which byte offset corresponds to the supplied start
* position.
*/
slice_start = 0;
if (length_not_specified) /* special case - get length to end of
* string */
slice_size = L1 = -1;
else
{
int E = S + length;
/*
* A negative value for L is the only way for the end position to
* be before the start. SQL99 says to throw an error.
*/
if (E < S)
ereport(ERROR,
(errcode(ERRCODE_SUBSTRING_ERROR),
errmsg("negative substring length not allowed"),
errOmitLocation(true)));
/*
* A zero or negative value for the end position can happen if the
* start was negative or one. SQL99 says to return a zero-length
* string.
*/
if (E < 1)
return cstring_to_text("");
/*
* if E is past the end of the string, the tuple toaster will
* truncate the length for us
*/
L1 = E - S1;
/*
* Total slice size in bytes can't be any longer than the start
* position plus substring length times the encoding max length.
*/
slice_size = (S1 + L1) * eml;
}
/*
* If we're working with an untoasted source, no need to do an extra
* copying step.
*/
if (VARATT_IS_COMPRESSED(DatumGetPointer(str)) ||
VARATT_IS_EXTERNAL(DatumGetPointer(str)))
slice = DatumGetTextPSlice(str, slice_start, slice_size);
else
slice = (text *) DatumGetPointer(str);
/* see if we got back an empty string */
if (VARSIZE_ANY_EXHDR(slice) == 0)
{
if (slice != (text *) DatumGetPointer(str))
pfree(slice);
return cstring_to_text("");
}
/* Now we can get the actual length of the slice in MB characters */
slice_strlen = pg_mbstrlen_with_len(VARDATA_ANY(slice),
VARSIZE_ANY_EXHDR(slice));
/*
* Check that the start position wasn't > slice_strlen. If so, SQL99
* says to return a zero-length string.
*/
if (S1 > slice_strlen)
{
if (slice != (text *) DatumGetPointer(str))
pfree(slice);
return cstring_to_text("");
}
/*
* Adjust L1 and E1 now that we know the slice string length. Again
* remember that S1 is one based, and slice_start is zero based.
*/
if (L1 > -1)
E1 = Min(S1 + L1, slice_start + 1 + slice_strlen);
else
E1 = slice_start + 1 + slice_strlen;
/*
* Find the start position in the slice; remember S1 is not zero based
*/
p = VARDATA_ANY(slice);
for (i = 0; i < S1 - 1; i++)
p += pg_mblen(p);
/* hang onto a pointer to our start position */
s = p;
/*
* Count the actual bytes used by the substring of the requested
* length.
*/
for (i = S1; i < E1; i++)
p += pg_mblen(p);
ret = (text *) palloc(VARHDRSZ + (p - s));
SET_VARSIZE(ret, VARHDRSZ + (p - s));
memcpy(VARDATA(ret), s, (p - s));
if (slice != (text *) DatumGetPointer(str))
pfree(slice);
return ret;
}
else
elog(ERROR, "invalid backend encoding: encoding max length < 1");
/* not reached: suppress compiler warning */
return NULL;
}
/*
* textpos -
* Return the position of the specified substring.
* Implements the SQL92 POSITION() function.
* Ref: A Guide To The SQL Standard, Date & Darwen, 1997
* - thomas 1997-07-27
*/
Datum
textpos(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
Datum d1 = PG_GETARG_DATUM(1);
char *p1; void *tofree1; int len1;
int32 pos;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
pos = text_position_ptr_len(p0, len0, p1, len1);
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
PG_RETURN_INT32(pos);
}
/*
* text_position -
* Does the real work for textpos()
*
* Inputs:
* p1, len1 - string to be searched
* p2, len2 - pattern to match within t1
* Result:
* Character index of the first matched char, starting from 1,
* or 0 if no match.
*
* This is broken out so it can be called directly by other string processing
* functions.
*/
static int
text_position_ptr_len(char* p1, int len1, char* p2, int len2)
{
TextPositionState state =
{
0, /* use_wchar */
NULL, /* str1 */
NULL, /* str2 */
NULL, /* wstr1 */
NULL, /* wstr2 */
0, /* len1 */
0, /* len2 */
};
int result;
text_position_setup_ptr_len(p1, len1, p2, len2, &state);
result = text_position_next(1, &state);
text_position_cleanup(&state);
return result;
}
/*
* text_position_setup, text_position_next, text_position_cleanup -
* Component steps of text_position()
*
* These are broken out so that a string can be efficiently searched for
* multiple occurrences of the same pattern. text_position_next may be
* called multiple times with increasing values of start_pos, which is
* the 1-based character position to start the search from. The "state"
* variable is normally just a local variable in the caller.
*/
/* Set up text postion, using pointer and len. */
static void
text_position_setup_ptr_len(char* p1, int len1, char* p2, int len2, TextPositionState *state)
{
if (pg_database_encoding_max_length() == 1)
{
/* simple case - single byte encoding */
state->use_wchar = false;
state->str1 = p1;
state->str2 = p2;
state->len1 = len1;
state->len2 = len2;
}
else
{
/* not as simple - multibyte encoding */
pg_wchar *wp1,
*wp2;
wp1 = (pg_wchar *) palloc((len1 + 1) * sizeof(pg_wchar));
len1 = pg_mb2wchar_with_len(p1, wp1, len1);
wp2 = (pg_wchar *) palloc((len2 + 1) * sizeof(pg_wchar));
len2 = pg_mb2wchar_with_len(p2, wp2, len2);
state->use_wchar = true;
state->wstr1 = wp1;
state->wstr2 = wp2;
state->len1 = len1;
state->len2 = len2;
}
}
static int
text_position_next(int start_pos, TextPositionState *state)
{
int pos = 0,
p,
px;
Assert(start_pos > 0); /* else caller error */
if (state->len2 <= 0)
return start_pos; /* result for empty pattern */
if (!state->use_wchar)
{
/* simple case - single byte encoding */
char *p1 = state->str1;
char *p2 = state->str2;
/* no use in searching str past point where search_str will fit */
px = (state->len1 - state->len2);
p1 += start_pos - 1;
for (p = start_pos - 1; p <= px; p++)
{
if ((*p1 == *p2) && (strncmp(p1, p2, state->len2) == 0))
{
pos = p + 1;
break;
}
p1++;
}
}
else
{
/* not as simple - multibyte encoding */
pg_wchar *p1 = state->wstr1;
pg_wchar *p2 = state->wstr2;
/* no use in searching str past point where search_str will fit */
px = (state->len1 - state->len2);
p1 += start_pos - 1;
for (p = start_pos - 1; p <= px; p++)
{
if ((*p1 == *p2) && (pg_wchar_strncmp(p1, p2, state->len2) == 0))
{
pos = p + 1;
break;
}
p1++;
}
}
return pos;
}
static void
text_position_cleanup(TextPositionState *state)
{
if (state->use_wchar)
{
pfree(state->wstr1);
pfree(state->wstr2);
}
}
/* varstr_cmp()
* Comparison function for text strings with given lengths.
* Includes locale support, but must copy strings to temporary memory
* to allow null-termination for inputs to strcoll().
* Returns -1, 0 or 1
*/
int
varstr_cmp(char *arg1, int len1, char *arg2, int len2)
{
int result;
/*
* Unfortunately, there is no strncoll(), so in the non-C locale case we
* have to do some memory copying. This turns out to be significantly
* slower, so we optimize the case where LC_COLLATE is C. We also try to
* optimize relatively-short strings by avoiding palloc/pfree overhead.
*/
if (lc_collate_is_c())
{
result = strncmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
else
{
#define STACKBUFLEN 1024
char a1buf[STACKBUFLEN];
char a2buf[STACKBUFLEN];
char *a1p,
*a2p;
#ifdef WIN32
/* Win32 does not have UTF-8, so we need to map to UTF-16 */
if (GetDatabaseEncoding() == PG_UTF8)
{
int a1len;
int a2len;
int r;
if (len1 >= STACKBUFLEN / 2)
{
a1len = len1 * 2 + 2;
a1p = palloc(a1len);
}
else
{
a1len = STACKBUFLEN;
a1p = a1buf;
}
if (len2 >= STACKBUFLEN / 2)
{
a2len = len2 * 2 + 2;
a2p = palloc(a2len);
}
else
{
a2len = STACKBUFLEN;
a2p = a2buf;
}
/* stupid Microsloth API does not work for zero-length input */
if (len1 == 0)
r = 0;
else
{
r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
(LPWSTR) a1p, a1len / 2);
if (!r)
ereport(ERROR,
(errmsg("could not convert string to UTF-16: error %lu",
GetLastError())));
}
((LPWSTR) a1p)[r] = 0;
if (len2 == 0)
r = 0;
else
{
r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
(LPWSTR) a2p, a2len / 2);
if (!r)
ereport(ERROR,
(errmsg("could not convert string to UTF-16: error %lu",
GetLastError())));
}
((LPWSTR) a2p)[r] = 0;
errno = 0;
result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw
* headers */
ereport(ERROR,
(errmsg("could not compare Unicode strings: %m")));
/*
* In some locales wcscoll() can claim that nonidentical strings
* are equal. Believing that would be bad news for a number of
* reasons, so we follow Perl's lead and sort "equal" strings
* according to strcmp (on the UTF-8 representation).
*/
if (result == 0)
{
result = strncmp(arg1, arg2, Min(len1, len2));
if ((result == 0) && (len1 != len2))
result = (len1 < len2) ? -1 : 1;
}
if (a1p != a1buf)
pfree(a1p);
if (a2p != a2buf)
pfree(a2p);
return result;
}
#endif /* WIN32 */
if (len1 >= STACKBUFLEN)
a1p = (char *) palloc(len1 + 1);
else
a1p = a1buf;
if (len2 >= STACKBUFLEN)
a2p = (char *) palloc(len2 + 1);
else
a2p = a2buf;
memcpy(a1p, arg1, len1);
a1p[len1] = '\0';
memcpy(a2p, arg2, len2);
a2p[len2] = '\0';
result = gp_strcoll(a1p, a2p);
/*
* In some locales strcoll() can claim that nonidentical strings are
* equal. Believing that would be bad news for a number of reasons,
* so we follow Perl's lead and sort "equal" strings according to
* strcmp().
*/
if (result == 0)
result = strcmp(a1p, a2p);
if (a1p != a1buf)
pfree(a1p);
if (a2p != a2buf)
pfree(a2p);
}
return result;
}
static inline int
text_cmp_datum(Datum d0, Datum d1)
{
char *p0; void *tofree0; int len0;
char *p1; void *tofree1; int len1;
int result;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
result = varstr_cmp(p0, len0, p1, len1);
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
return result;
}
static inline Datum generic_varlena_eq(PG_FUNCTION_ARGS)
{
bool result;
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
Datum d1 = PG_GETARG_DATUM(1);
char *p1; void *tofree1; int len1;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
/*
* Since we only care about equality or not-equality, we can avoid all the
* expense of strcoll() here, and just do bitwise comparison.
*/
if(len0 != len1)
result = false;
else
result = (memcmp(p0, p1, len1) == 0);
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
PG_RETURN_BOOL(result);
}
/*
* Comparison functions for text strings.
*
* Note: btree indexes need these routines not to leak memory; therefore,
* be careful to free working copies of toasted datums. Most places don't
* need to be so careful.
*/
Datum
texteq(PG_FUNCTION_ARGS)
{
return generic_varlena_eq(fcinfo);
}
Datum
textne(PG_FUNCTION_ARGS)
{
Datum d = generic_varlena_eq(fcinfo);
return (d==0 ? 1 : 0);
}
Datum
text_lt(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
bool result = (text_cmp_datum(d0, d1) < 0);
PG_RETURN_BOOL(result);
}
Datum
text_le(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
bool result = (text_cmp_datum(d0, d1) <= 0);
PG_RETURN_BOOL(result);
}
Datum
text_gt(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
bool result = (text_cmp_datum(d0, d1) > 0);
PG_RETURN_BOOL(result);
}
Datum
text_ge(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
bool result = (text_cmp_datum(d0, d1) >= 0);
PG_RETURN_BOOL(result);
}
Datum
bttextcmp(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
int result = text_cmp_datum(d0, d1);
PG_RETURN_INT32(result);
}
Datum
text_larger(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
int cmp = text_cmp_datum(d0, d1);
return (cmp > 0 ? d0 : d1);
}
Datum
text_smaller(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
int cmp = text_cmp_datum(d0, d1);
return (cmp < 0 ? d0 : d1);
}
/*
* The following operators support character-by-character comparison
* of text data types, to allow building indexes suitable for LIKE
* clauses.
*/
static inline int generic_varlena_cmp(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
Datum d1 = PG_GETARG_DATUM(1);
int result;
char *p0; void *tofree0; int len0;
char *p1; void *tofree1; int len1;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
result = memcmp(p0, p1, Min(len0, len1));
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
if(result != 0)
return result;
if (len0 < len1)
return -1;
else if (len0 > len1)
return 1;
else
return 0;
}
Datum
text_pattern_lt(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result < 0);
}
Datum
text_pattern_le(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result <= 0);
}
Datum
text_pattern_ge(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result >= 0);
}
Datum
text_pattern_gt(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result > 0);
}
Datum
bttext_pattern_cmp(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_INT32(result);
}
/*-------------------------------------------------------------
* byteaoctetlen
*
* get the number of bytes contained in an instance of type 'bytea'
*-------------------------------------------------------------
*/
Datum
byteaoctetlen(PG_FUNCTION_ARGS)
{
Datum str = PG_GETARG_DATUM(0);
/* We need not detoast the input at all */
PG_RETURN_INT32(toast_raw_datum_size(str) - VARHDRSZ);
}
/*
* byteacat -
* takes two bytea* and returns a bytea* that is the concatenation of
* the two.
*
* Cloned from textcat and modified as required.
*/
Datum
byteacat(PG_FUNCTION_ARGS)
{
return generic_varlena_cat(fcinfo);
}
#define PG_STR_GET_BYTEA(str_) \
DatumGetByteaP(DirectFunctionCall1(byteain, CStringGetDatum(str_)))
/*
* bytea_substr()
* Return a substring starting at the specified position.
* Cloned from text_substr and modified as required.
*
* Input:
* - string
* - starting position (is one-based)
* - string length (optional)
*
* If the starting position is zero or less, then return from the start of the string
* adjusting the length to be consistent with the "negative start" per SQL92.
* If the length is less than zero, an ERROR is thrown. If no third argument
* (length) is provided, the length to the end of the string is assumed.
*/
Datum
bytea_substr(PG_FUNCTION_ARGS)
{
int S = PG_GETARG_INT32(1); /* start position */
int S1; /* adjusted start position */
int L1; /* adjusted substring length */
S1 = Max(S, 1);
if (fcinfo->nargs == 2)
{
/*
* Not passed a length - PG_GETARG_BYTEA_P_SLICE() grabs everything to
* the end of the string if we pass it a negative value for length.
*/
L1 = -1;
}
else
{
/* end position */
int E = S + PG_GETARG_INT32(2);
/*
* A negative value for L is the only way for the end position to be
* before the start. SQL99 says to throw an error.
*/
if (E < S)
ereport(ERROR,
(errcode(ERRCODE_SUBSTRING_ERROR),
errmsg("negative substring length not allowed"),
errOmitLocation(true)));
/*
* A zero or negative value for the end position can happen if the
* start was negative or one. SQL99 says to return a zero-length
* string.
*/
if (E < 1)
PG_RETURN_BYTEA_P(PG_STR_GET_BYTEA(""));
L1 = E - S1;
}
/*
* If the start position is past the end of the string, SQL99 says to
* return a zero-length string -- PG_GETARG_TEXT_P_SLICE() will do that
* for us. Convert to zero-based starting position
*/
PG_RETURN_BYTEA_P(PG_GETARG_BYTEA_P_SLICE(0, S1 - 1, L1));
}
/*
* bytea_substr_no_len -
* Wrapper to avoid opr_sanity failure due to
* one function accepting a different number of args.
*/
Datum
bytea_substr_no_len(PG_FUNCTION_ARGS)
{
return bytea_substr(fcinfo);
}
/*
* byteapos -
* Return the position of the specified substring.
* Implements the SQL92 POSITION() function.
* Cloned from textpos and modified as required.
*/
Datum
byteapos(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
Datum d1 = PG_GETARG_DATUM(1);
char *p1; void *tofree1; int len1;
int pos;
int px, p;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
/* empty pattern */
if(len1 == 0)
PG_RETURN_INT32(1);
pos = 0;
px = (len0 - len1);
for (p = 0; p <= px; p++)
{
if ((*p1 == *p0) && (memcmp(p0, p1, len1) == 0))
{
pos = p + 1;
break;
};
p0++;
};
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
PG_RETURN_INT32(pos);
}
/*-------------------------------------------------------------
* byteaGetByte
*
* this routine treats "bytea" as an array of bytes.
* It returns the Nth byte (a number between 0 and 255).
*-------------------------------------------------------------
*/
Datum
byteaGetByte(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
int32 n = PG_GETARG_INT32(1);
int32 result;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
if (n < 0 || n >= len0)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("index %d out of valid range, 0..%d",
n, len0 - 1),
errOmitLocation(true)));
result = (unsigned char) p0[n];
if(tofree0)
pfree(tofree0);
PG_RETURN_INT32(result);
}
/*-------------------------------------------------------------
* byteaGetBit
*
* This routine treats a "bytea" type like an array of bits.
* It returns the value of the Nth bit (0 or 1).
*
*-------------------------------------------------------------
*/
Datum
byteaGetBit(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
int32 n = PG_GETARG_INT32(1);
int byteNo, bitNo;
int result;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
if (n < 0 || n >= len0 * 8)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("index %d out of valid range, 0..%d",
n, len0 * 8 - 1),
errOmitLocation(true)));
byteNo = n / 8;
bitNo = n % 8;
result = (unsigned char) p0[byteNo];
if(tofree0)
pfree(tofree0);
if (result & (1 << bitNo))
PG_RETURN_INT32(1);
else
PG_RETURN_INT32(0);
}
/*-------------------------------------------------------------
* byteaSetByte
*
* Given an instance of type 'bytea' creates a new one with
* the Nth byte set to the given value.
*
*-------------------------------------------------------------
*/
Datum
byteaSetByte(PG_FUNCTION_ARGS)
{
bytea *v = PG_GETARG_BYTEA_P(0);
int32 n = PG_GETARG_INT32(1);
int32 newByte = PG_GETARG_INT32(2);
int len;
bytea *res;
len = VARSIZE(v) - VARHDRSZ;
if (n < 0 || n >= len)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("index %d out of valid range, 0..%d",
n, len - 1),
errOmitLocation(true)));
/*
* Make a copy of the original varlena.
*/
res = (bytea *) palloc(VARSIZE(v));
memcpy((char *) res, (char *) v, VARSIZE(v));
/*
* Now set the byte.
*/
((unsigned char *) VARDATA(res))[n] = newByte;
PG_RETURN_BYTEA_P(res);
}
/*-------------------------------------------------------------
* byteaSetBit
*
* Given an instance of type 'bytea' creates a new one with
* the Nth bit set to the given value.
*
*-------------------------------------------------------------
*/
Datum
byteaSetBit(PG_FUNCTION_ARGS)
{
bytea *v = PG_GETARG_BYTEA_P(0);
int32 n = PG_GETARG_INT32(1);
int32 newBit = PG_GETARG_INT32(2);
bytea *res;
int len;
int oldByte,
newByte;
int byteNo,
bitNo;
len = VARSIZE(v) - VARHDRSZ;
if (n < 0 || n >= len * 8)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("index %d out of valid range, 0..%d",
n, len * 8 - 1),
errOmitLocation(true)));
byteNo = n / 8;
bitNo = n % 8;
/*
* sanity check!
*/
if (newBit != 0 && newBit != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("new bit must be 0 or 1"),
errOmitLocation(true)));
/*
* Make a copy of the original varlena.
*/
res = (bytea *) palloc(VARSIZE(v));
memcpy((char *) res, (char *) v, VARSIZE(v));
/*
* Update the byte.
*/
oldByte = ((unsigned char *) VARDATA(res))[byteNo];
if (newBit == 0)
newByte = oldByte & (~(1 << bitNo));
else
newByte = oldByte | (1 << bitNo);
((unsigned char *) VARDATA(res))[byteNo] = newByte;
PG_RETURN_BYTEA_P(res);
}
/* text_name()
* Converts a text type to a Name type.
*/
Datum
text_name(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
Name result;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
/* Truncate oversize input */
if (len0 >= NAMEDATALEN)
len0 = NAMEDATALEN - 1;
result = (Name) palloc(NAMEDATALEN);
memcpy(NameStr(*result), p0, len0);
/* now null pad to full length... */
while (len0 < NAMEDATALEN)
{
*(NameStr(*result) + len0) = '\0';
len0++;
}
if(tofree0)
pfree(tofree0);
PG_RETURN_NAME(result);
}
/* name_text()
* Converts a Name type to a text type.
*/
Datum
name_text(PG_FUNCTION_ARGS)
{
Name s = PG_GETARG_NAME(0);
PG_RETURN_TEXT_P(cstring_to_text(NameStr(*s)));
}
/*
* textToQualifiedNameList - convert a text object to list of names
*
* This implements the input parsing needed by nextval() and other
* functions that take a text parameter representing a qualified name.
* We split the name at dots, downcase if not double-quoted, and
* truncate names if they're too long.
*/
List *
textToQualifiedNameList(text *textval)
{
char *rawname;
List *result = NIL;
List *namelist;
ListCell *l;
/* Convert to C string (handles possible detoasting). */
/* Note we rely on being able to modify rawname below. */
rawname = text_to_cstring(textval);
if (!SplitIdentifierString(rawname, '.', &namelist))
ereport(ERROR,
(errcode(ERRCODE_INVALID_NAME),
errmsg("invalid name syntax"),
errOmitLocation(true)));
if (namelist == NIL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_NAME),
errmsg("invalid name syntax"),
errOmitLocation(true)));
foreach(l, namelist)
{
char *curname = (char *) lfirst(l);
result = lappend(result, makeString(pstrdup(curname)));
}
pfree(rawname);
list_free(namelist);
return result;
}
/*
* SplitIdentifierString --- parse a string containing identifiers
*
* This is the guts of textToQualifiedNameList, and is exported for use in
* other situations such as parsing GUC variables. In the GUC case, it's
* important to avoid memory leaks, so the API is designed to minimize the
* amount of stuff that needs to be allocated and freed.
*
* Inputs:
* rawstring: the input string; must be overwritable! On return, it's
* been modified to contain the separated identifiers.
* separator: the separator punctuation expected between identifiers
* (typically '.' or ','). Whitespace may also appear around
* identifiers.
* Outputs:
* namelist: filled with a palloc'd list of pointers to identifiers within
* rawstring. Caller should list_free() this even on error return.
*
* Returns TRUE if okay, FALSE if there is a syntax error in the string.
*
* Note that an empty string is considered okay here, though not in
* textToQualifiedNameList.
*/
bool
SplitIdentifierString(char *rawstring, char separator,
List **namelist)
{
char *nextp = rawstring;
bool done = false;
*namelist = NIL;
while (isspace((unsigned char) *nextp))
nextp++; /* skip leading whitespace */
if (*nextp == '\0')
return true; /* allow empty string */
/* At the top of the loop, we are at start of a new identifier. */
do
{
char *curname;
char *endp;
if (*nextp == '\"')
{
/* Quoted name --- collapse quote-quote pairs, no downcasing */
curname = nextp + 1;
for (;;)
{
endp = strchr(nextp + 1, '\"');
if (endp == NULL)
return false; /* mismatched quotes */
if (endp[1] != '\"')
break; /* found end of quoted name */
/* Collapse adjacent quotes into one quote, and look again */
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
}
/* endp now points at the terminating quote */
nextp = endp + 1;
}
else
{
/* Unquoted name --- extends to separator or whitespace */
char *downname;
int len;
curname = nextp;
while (*nextp && *nextp != separator &&
!isspace((unsigned char) *nextp))
nextp++;
endp = nextp;
if (curname == nextp)
return false; /* empty unquoted name not allowed */
/*
* Downcase the identifier, using same code as main lexer does.
*
* XXX because we want to overwrite the input in-place, we cannot
* support a downcasing transformation that increases the string
* length. This is not a problem given the current implementation
* of downcase_truncate_identifier, but we'll probably have to do
* something about this someday.
*/
len = endp - curname;
downname = downcase_truncate_identifier(curname, len, false);
Assert(strlen(downname) <= len);
strncpy(curname, downname, len);
pfree(downname);
}
while (isspace((unsigned char) *nextp))
nextp++; /* skip trailing whitespace */
if (*nextp == separator)
{
nextp++;
while (isspace((unsigned char) *nextp))
nextp++; /* skip leading whitespace for next */
/* we expect another name, so done remains false */
}
else if (*nextp == '\0')
done = true;
else
return false; /* invalid syntax */
/* Now safe to overwrite separator with a null */
*endp = '\0';
/* Truncate name if it's overlength */
truncate_identifier(curname, strlen(curname), false);
/*
* Finished isolating current name --- add it to list
*/
*namelist = lappend(*namelist, curname);
/* Loop back if we didn't reach end of string */
} while (!done);
return true;
}
/*****************************************************************************
* Comparison Functions used for bytea
*
* Note: btree indexes need these routines not to leak memory; therefore,
* be careful to free working copies of toasted datums. Most places don't
* need to be so careful.
*****************************************************************************/
Datum
byteaeq(PG_FUNCTION_ARGS)
{
return generic_varlena_eq(fcinfo);
}
Datum
byteane(PG_FUNCTION_ARGS)
{
Datum d = generic_varlena_eq(fcinfo);
return (d==0 ? 1 : 0);
}
Datum
bytealt(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result < 0);
}
Datum
byteale(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result <= 0);
}
Datum
byteagt(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result > 0);
}
Datum
byteage(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_BOOL(result >= 0);
}
Datum
byteacmp(PG_FUNCTION_ARGS)
{
int result = generic_varlena_cmp(fcinfo);
PG_RETURN_INT32(result);
}
/*
* appendStringInfoText
*
* Append a text to str.
* Like appendStringInfoString(str, PG_TEXT_GET_STR(s)) but faster.
*/
static void
appendStringInfoText(StringInfo str, const text *t)
{
appendBinaryStringInfo(str, VARDATA_ANY((void *) t), VARSIZE_ANY_EXHDR((void *) t));
}
/*
* replace_text
* replace all occurrences of 'old_sub_str' in 'orig_str'
* with 'new_sub_str' to form 'new_str'
*
* returns 'orig_str' if 'old_sub_str' == '' or 'orig_str' == ''
* otherwise returns 'new_str'
*/
Datum
replace_text(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
Datum d1 = PG_GETARG_DATUM(1);
char *p1; void *tofree1; int len1;
Datum d2 = PG_GETARG_DATUM(2);
char *p2; void *tofree2; int len2;
int from_sub_text_len;
TextPositionState state =
{
0, /* use_wchar */
NULL, /* str1 */
NULL, /* str2 */
NULL, /* wstr1 */
NULL, /* wstr2 */
0, /* len1 */
0, /* len2 */
};
text *ret_text;
int start_posn;
int curr_posn;
int chunk_len;
char *start_ptr;
StringInfoData str;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
varattrib_untoast_ptr_len(d2, &p2, &len2, &tofree2);
if(pg_database_encoding_max_length() == 1)
from_sub_text_len = len1;
else
from_sub_text_len = pg_mbstrlen_with_len(p1, len1);
if (len0 == 0 || from_sub_text_len == 0)
{
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
if(tofree2)
pfree(tofree2);
return d0;
}
text_position_setup_ptr_len(p0, len0, p1, len1, &state);
start_posn = 1;
curr_posn = text_position_next(1, &state);
/* When the from_sub_text is not found, there is nothing to do. */
if (curr_posn == 0)
{
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
if(tofree2)
pfree(tofree2);
text_position_cleanup(&state);
return d0;
}
/* start_ptr points to the start_posn'th character of src_text */
start_ptr = p0;
initStringInfo(&str);
do
{
/* copy the data skipped over by last text_position_next() */
chunk_len = charlen_to_bytelen(start_ptr, curr_posn - start_posn);
appendBinaryStringInfo(&str, start_ptr, chunk_len);
appendBinaryStringInfo(&str, p2, len2);
start_posn = curr_posn;
start_ptr += chunk_len;
start_posn += from_sub_text_len;
start_ptr += charlen_to_bytelen(start_ptr, from_sub_text_len);
curr_posn = text_position_next(start_posn, &state);
}
while (curr_posn > 0);
/* copy trailing data */
chunk_len = ((char *) p0 + len0) - start_ptr;
appendBinaryStringInfo(&str, start_ptr, chunk_len);
text_position_cleanup(&state);
ret_text = PG_STR_GET_TEXT(str.data);
pfree(str.data);
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
if(tofree2)
pfree(tofree2);
PG_RETURN_TEXT_P(ret_text);
}
/*
* check_replace_text_has_escape_char
*
* check whether replace_text contains escape char.
*/
static bool
check_replace_text_has_escape_char(const text *replace_text)
{
const char *p = VARDATA_ANY((void *) replace_text);
const char *p_end = p + VARSIZE_ANY((void *) replace_text);
if (pg_database_encoding_max_length() == 1)
{
for (; p < p_end; p++)
{
if (*p == '\\')
return true;
}
}
else
{
for (; p < p_end; p += pg_mblen(p))
{
if (*p == '\\')
return true;
}
}
return false;
}
/*
* appendStringInfoRegexpSubstr
*
* Append replace_text to str, substituting regexp back references for
* \n escapes. start_ptr is the start of the match in the source string,
* at logical character position data_pos.
*/
static void
appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
regmatch_t *pmatch,
char *start_ptr, int data_pos)
{
const char *p = VARDATA_ANY(replace_text);
const char *p_end = p + VARSIZE_ANY_EXHDR(replace_text);
int eml = pg_database_encoding_max_length();
for (;;)
{
const char *chunk_start = p;
int so;
int eo;
/* Find next escape char. */
if (eml == 1)
{
for (; p < p_end && *p != '\\'; p++)
/* nothing */ ;
}
else
{
for (; p < p_end && *p != '\\'; p += pg_mblen(p))
/* nothing */ ;
}
/* Copy the text we just scanned over, if any. */
if (p > chunk_start)
appendBinaryStringInfo(str, chunk_start, p - chunk_start);
/* Done if at end of string, else advance over escape char. */
if (p >= p_end)
break;
p++;
if (p >= p_end)
{
/* Escape at very end of input. Treat same as unexpected char */
appendStringInfoChar(str, '\\');
break;
}
if (*p >= '1' && *p <= '9')
{
/* Use the back reference of regexp. */
int idx = *p - '0';
so = pmatch[idx].rm_so;
eo = pmatch[idx].rm_eo;
p++;
}
else if (*p == '&')
{
/* Use the entire matched string. */
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
p++;
}
else if (*p == '\\')
{
/* \\ means transfer one \ to output. */
appendStringInfoChar(str, '\\');
p++;
continue;
}
else
{
/*
* If escape char is not followed by any expected char, just treat
* it as ordinary data to copy. (XXX would it be better to throw
* an error?)
*/
appendStringInfoChar(str, '\\');
continue;
}
if (so != -1 && eo != -1)
{
/*
* Copy the text that is back reference of regexp. Note so and eo
* are counted in characters not bytes.
*/
char *chunk_start;
int chunk_len;
Assert(so >= data_pos);
chunk_start = start_ptr;
chunk_start += charlen_to_bytelen(chunk_start, so - data_pos);
chunk_len = charlen_to_bytelen(chunk_start, eo - so);
appendBinaryStringInfo(str, chunk_start, chunk_len);
}
}
}
#define REGEXP_REPLACE_BACKREF_CNT 10
/*
* replace_text_regexp
*
* replace text that matches to regexp in src_text to replace_text.
*
* Note: to avoid having to include regex.h in builtins.h, we declare
* the regexp argument as void *, but really it's regex_t *.
*/
text *
replace_text_regexp(text *src_text, void *regexp,
text *replace_text, bool glob)
{
text *ret_text;
regex_t *re = (regex_t *) regexp;
int src_text_len = VARSIZE_ANY_EXHDR(src_text);
StringInfoData buf;
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
pg_wchar *data;
size_t data_len;
int search_start;
int data_pos;
char *start_ptr;
bool have_escape;
initStringInfo(&buf);
/* Convert data string to wide characters. */
data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(VARDATA_ANY(src_text), data, src_text_len);
/* Check whether replace_text has escape char. */
have_escape = check_replace_text_has_escape_char(replace_text);
/* start_ptr points to the data_pos'th character of src_text */
start_ptr = (char *) VARDATA_ANY(src_text);
data_pos = 0;
search_start = 0;
while (search_start <= data_len)
{
int regexec_result;
CHECK_FOR_INTERRUPTS();
regexec_result = pg_regexec(re,
data,
data_len,
search_start,
NULL, /* no details */
REGEXP_REPLACE_BACKREF_CNT,
pmatch,
0);
if (regexec_result == REG_NOMATCH)
break;
if (regexec_result != REG_OKAY)
{
char errMsg[100];
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression failed: %s", errMsg),
errOmitLocation(true)));
}
/*
* Copy the text to the left of the match position. Note we are given
* character not byte indexes.
*/
if (pmatch[0].rm_so - data_pos > 0)
{
int chunk_len;
chunk_len = charlen_to_bytelen(start_ptr,
pmatch[0].rm_so - data_pos);
appendBinaryStringInfo(&buf, start_ptr, chunk_len);
/*
* Advance start_ptr over that text, to avoid multiple rescans of
* it if the replace_text contains multiple back-references.
*/
start_ptr += chunk_len;
data_pos = pmatch[0].rm_so;
}
/*
* Copy the replace_text. Process back references when the
* replace_text has escape characters.
*/
if (have_escape)
appendStringInfoRegexpSubstr(&buf, replace_text, pmatch,
start_ptr, data_pos);
else
appendStringInfoText(&buf, replace_text);
/* Advance start_ptr and data_pos over the matched text. */
start_ptr += charlen_to_bytelen(start_ptr,
pmatch[0].rm_eo - data_pos);
data_pos = pmatch[0].rm_eo;
/*
* When global option is off, replace the first instance only.
*/
if (!glob)
break;
/*
* Search from next character when the matching text is zero width.
*/
search_start = data_pos;
if (pmatch[0].rm_so == pmatch[0].rm_eo)
search_start++;
}
/*
* Copy the text to the right of the last match.
*/
if (data_pos < data_len)
{
int chunk_len;
chunk_len = ((char *) src_text + VARSIZE_ANY(src_text)) - start_ptr;
appendBinaryStringInfo(&buf, start_ptr, chunk_len);
}
ret_text = cstring_to_text_with_len(buf.data, buf.len);
pfree(buf.data);
pfree(data);
return ret_text;
}
/*
* split_text
* parse input string
* return ord item (1 based)
* based on provided field separator
*/
Datum
split_text(PG_FUNCTION_ARGS)
{
Datum d0 = PG_GETARG_DATUM(0);
char *p0; void *tofree0; int len0;
Datum d1 = PG_GETARG_DATUM(1);
char *p1; void *tofree1; int len1;
int fldnum = PG_GETARG_INT32(2);
int inputstring_len;
int fldsep_len;
TextPositionState state =
{
0, /* use_wchar */
NULL, /* str1 */
NULL, /* str2 */
NULL, /* wstr1 */
NULL, /* wstr2 */
0, /* len1 */
0, /* len2 */
};
int start_posn;
int end_posn;
text *result_text;
varattrib_untoast_ptr_len(d0, &p0, &len0, &tofree0);
varattrib_untoast_ptr_len(d1, &p1, &len1, &tofree1);
if(pg_database_encoding_max_length() == 1)
{
inputstring_len = len0;
fldsep_len = len1;
}
else
{
inputstring_len = pg_mbstrlen_with_len(p0, len0);
fldsep_len = pg_mbstrlen_with_len(p1, len1);
}
/* field number is 1 based */
if (fldnum < 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("field position must be greater than zero"),
errOmitLocation(true)));
/* return empty string for empty input string */
if (inputstring_len < 1)
{
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
}
/* empty field separator */
if (fldsep_len < 1)
{
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
/* if first field, return input string, else empty string */
if (fldnum == 1)
return d0;
else
PG_RETURN_TEXT_P(PG_STR_GET_TEXT(""));
}
text_position_setup_ptr_len(p0, len0, p1, len1, &state);
/* identify bounds of first field */
start_posn = 1;
end_posn = text_position_next(1, &state);
/* special case if fldsep not found at all */
if (end_posn == 0)
{
text_position_cleanup(&state);
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
/* if field 1 requested, return input string, else empty string */
if (fldnum == 1)
return d0;
else
PG_RETURN_TEXT_P(cstring_to_text(""));
}
while (end_posn > 0 && --fldnum > 0)
{
/* identify bounds of next field */
start_posn = end_posn + fldsep_len;
end_posn = text_position_next(start_posn, &state);
}
text_position_cleanup(&state);
if (fldnum > 0)
{
/* N'th field separator not found */
/* if last field requested, return it, else empty string */
if (fldnum == 1)
result_text = text_substring(d0,
start_posn,
-1,
true);
else
result_text = cstring_to_text("");
}
else
{
/* non-last field requested */
result_text = text_substring(d0,
start_posn,
end_posn - start_posn,
false);
}
if(tofree0)
pfree(tofree0);
if(tofree1)
pfree(tofree1);
PG_RETURN_TEXT_P(result_text);
}
/*
* text_to_array_impl
* Carries out the actual tokenization and array conversion of an input string.
*
* Parameters:
* string: Where to start in the input string
* stringByteLen: Length of current string
* delimiter: Which delimiter to use
* delimiterByteLen: Length of delimiter in bytes
* delimiterCharLen: Length of delimiter in chars
* arrayState: State of the output array where we accumulate results
* endOfString: Do we expect any more chunk of the main input string?
*
* Returns the pointer where the last match was found. Successively the
* caller can splice more data starting from this address to find further
* array elements.
*/
static char* text_to_array_impl(char *string, int stringByteLen, char *delimiter,
int delimiterByteLen, int delimiterCharLen, ArrayBuildState **arrayState, bool endOfString)
{
int start_posn = 1;
int fldnum = 1;
int end_posn = 0;
int chunk_len = 0;
text *result_text;
char* cur_ptr = string;
TextPositionState state =
{
0, /* use_wchar */
NULL, /* str1 */
NULL, /* str2 */
NULL, /* wstr1 */
NULL, /* wstr2 */
0, /* len1 */
0, /* len2 */
};
text_position_setup_ptr_len(string, stringByteLen, delimiter, delimiterByteLen, &state);
for (fldnum = 1;; fldnum++) /* field number is 1 based */
{
end_posn = text_position_next(start_posn, &state);
if (end_posn == 0 && !endOfString)
{
break;
}
else if (end_posn == 0)
{
/* fetch last field */
chunk_len = (string + stringByteLen) - cur_ptr;
}
else
{
/* fetch non-last field */
chunk_len = charlen_to_bytelen(cur_ptr, end_posn - start_posn);
}
/* must build a temp text datum to pass to accumArrayResult */
result_text = cstring_to_text_with_len(cur_ptr, chunk_len);
/* stash away this field */
*arrayState = accumArrayResult(*arrayState,
PointerGetDatum(result_text),
false,
TEXTOID,
CurrentMemoryContext);
pfree(result_text);
if (end_posn == 0)
{
/* Process next sub-string if any */
break;
}
start_posn = end_posn;
cur_ptr += chunk_len;
start_posn += delimiterCharLen;
cur_ptr += charlen_to_bytelen(cur_ptr, delimiterCharLen);
}
text_position_cleanup(&state);
return cur_ptr;
}
/*
* text_to_array_multi_pass
* Carries out the actual tokenization and array conversion of input string
* in multiple passes, where each pass is restricted to GPDB memory allocation limit.
*
* Parameters:
* string: The start of the input string
* stringByteLen: Length of current string
* delimiter: Which delimiter to use
* delimiterByteLen: Length of delimiter in bytes
* delimiterCharLen: Length of delimiter in chars
* endOfString: Do we expect any more chunk of the main input string?
*
* Returns the ArrayBuildState containing all the array elements.
*/
static ArrayBuildState* text_to_array_multi_pass(char *string, int stringByteLen, char *delimiter, int delimiterByteLen, int delimiterCharLen)
{
ArrayBuildState *astate = NULL;
/* Start with full string. If it is too big then we chunk it later */
char *start_ptr = string;
int curSubStringByteLen = stringByteLen;
bool endOfString = false;
/* More bytes to consider? */
while (!endOfString)
{
/*
* Give the rest of the string to the current pass; may be chunked if
* the rest still doesn't fit in the memory
*/
curSubStringByteLen = (string + stringByteLen) - start_ptr;
/* Will this MBCS become too big to fit in memory once converted to wchar? */
if (pg_database_encoding_max_length() > 1 && curSubStringByteLen > ((MAX_STRING_BYTES)/ sizeof(pg_wchar)))
{
int curSubStringCharLen = 0;
/* We need multi-pass. So find the sub-string boundary for the current pass */
find_memory_limited_substring(start_ptr, string + stringByteLen - start_ptr,
(MAX_STRING_BYTES) / sizeof(pg_wchar), &curSubStringByteLen, &curSubStringCharLen);
}
Insist(start_ptr + curSubStringByteLen <= string + stringByteLen);
endOfString = ((start_ptr + curSubStringByteLen) == (string + stringByteLen));
char *nextStartPtr = text_to_array_impl(start_ptr, curSubStringByteLen, delimiter, delimiterByteLen, delimiterCharLen, &astate, endOfString);
Insist(nextStartPtr >= start_ptr);
if (!endOfString && nextStartPtr == start_ptr)
{
elog(ERROR, "String size not supported.");
}
start_ptr = nextStartPtr;
}
return astate;
}
/*
* * text_to_array
* * parse input string
* * return text array of elements
* * based on provided field separator
* */
Datum
text_to_array(PG_FUNCTION_ARGS)
{
Datum stringDatum = PG_GETARG_DATUM(0);
char *string = NULL;
void *toFreeString = NULL;
int stringByteLen = 0;
Datum delimiterDatum = PG_GETARG_DATUM(1);
char *delimiter = NULL;
void *toFreeDelimiter = NULL;
int delimiterByteLen = 0;
int stringCharLen = 0;
int delimiterCharLen = 0;
varattrib_untoast_ptr_len(stringDatum, &string, &stringByteLen, &toFreeString);
varattrib_untoast_ptr_len(delimiterDatum, &delimiter, &delimiterByteLen, &toFreeDelimiter);
if(pg_database_encoding_max_length() == 1)
{
stringCharLen = stringByteLen;
delimiterCharLen = delimiterByteLen;
}
else
{
stringCharLen = pg_mbstrlen_with_len(string, stringByteLen);
delimiterCharLen = pg_mbstrlen_with_len(delimiter, delimiterByteLen);
}
/* return NULL for empty input string */
if (stringCharLen < 1)
{
if(toFreeString)
{
pfree(toFreeString);
}
if(toFreeDelimiter)
{
pfree(toFreeDelimiter);
}
PG_RETURN_NULL();
}
/*
* empty field separator return one element, 1D, array using the input
* string
*/
if (delimiterCharLen < 1)
{
if(toFreeString)
{
pfree(toFreeString);
}
if(toFreeDelimiter)
{
pfree(toFreeDelimiter);
}
PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID, stringDatum, 1));
}
ArrayBuildState *astate = text_to_array_multi_pass(string, stringByteLen, delimiter, delimiterByteLen, delimiterCharLen);
if(toFreeString)
{
pfree(toFreeString);
}
if(toFreeDelimiter)
{
pfree(toFreeDelimiter);
}
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
}
/*
* array_to_text
* concatenate Cstring representation of input array elements
* using provided field separator
*/
Datum
array_to_text(PG_FUNCTION_ARGS)
{
ArrayType *v = PG_GETARG_ARRAYTYPE_P(0);
char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1));
int nitems,
*dims,
ndims;
Oid element_type;
int typlen;
bool typbyval;
char typalign;
StringInfoData buf;
bool printed = false;
char *p;
bits8 *bitmap;
int bitmask;
int i;
ArrayMetaState *my_extra;
ndims = ARR_NDIM(v);
dims = ARR_DIMS(v);
nitems = ArrayGetNItems(ndims, dims);
/* if there are no elements, return an empty string */
if (nitems == 0)
PG_RETURN_TEXT_P(cstring_to_text(""));
element_type = ARR_ELEMTYPE(v);
initStringInfo(&buf);
/*
* We arrange to look up info about element type, including its output
* conversion proc, only once per series of calls, assuming the element
* type doesn't change underneath us.
*/
my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
if (my_extra == NULL)
{
fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
sizeof(ArrayMetaState));
my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra;
my_extra->element_type = ~element_type;
}
if (my_extra->element_type != element_type)
{
/*
* Get info about element type, including its output conversion proc
*/
get_type_io_data(element_type, IOFunc_output,
&my_extra->typlen, &my_extra->typbyval,
&my_extra->typalign, &my_extra->typdelim,
&my_extra->typioparam, &my_extra->typiofunc);
fmgr_info_cxt(my_extra->typiofunc, &my_extra->proc,
fcinfo->flinfo->fn_mcxt);
my_extra->element_type = element_type;
}
typlen = my_extra->typlen;
typbyval = my_extra->typbyval;
typalign = my_extra->typalign;
p = ARR_DATA_PTR(v);
bitmap = ARR_NULLBITMAP(v);
bitmask = 1;
for (i = 0; i < nitems; i++)
{
Datum itemvalue;
char *value;
/* Get source element, checking for NULL */
if (bitmap && (*bitmap & bitmask) == 0)
{
/* we ignore nulls */
}
else
{
itemvalue = fetch_att(p, typbyval, typlen);
value = OutputFunctionCall(&my_extra->proc, itemvalue);
if (printed)
appendStringInfo(&buf, "%s%s", fldsep, value);
else
appendStringInfoString(&buf, value);
printed = true;
p = att_addlength_pointer(p, typlen, p);
p = (char *) att_align_nominal(p, typalign);
}
/* advance bitmap pointer if any */
if (bitmap)
{
bitmask <<= 1;
if (bitmask == 0x100)
{
bitmap++;
bitmask = 1;
}
}
}
PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len));
}
#define HEXBASE 16
/*
* Convert a int32 to a string containing a base 16 (hex) representation of
* the number.
*/
Datum
to_hex32(PG_FUNCTION_ARGS)
{
uint32 value = (uint32) PG_GETARG_INT32(0);
char *ptr;
const char *digits = "0123456789abcdef";
char buf[32]; /* bigger than needed, but reasonable */
ptr = buf + sizeof(buf) - 1;
*ptr = '\0';
do
{
*--ptr = digits[value % HEXBASE];
value /= HEXBASE;
} while (ptr > buf && value);
PG_RETURN_TEXT_P(cstring_to_text(ptr));
}
/*
* Convert a int64 to a string containing a base 16 (hex) representation of
* the number.
*/
Datum
to_hex64(PG_FUNCTION_ARGS)
{
uint64 value = (uint64) PG_GETARG_INT64(0);
char *ptr;
const char *digits = "0123456789abcdef";
char buf[32]; /* bigger than needed, but reasonable */
ptr = buf + sizeof(buf) - 1;
*ptr = '\0';
do
{
*--ptr = digits[value % HEXBASE];
value /= HEXBASE;
} while (ptr > buf && value);
PG_RETURN_TEXT_P(cstring_to_text(ptr));
}
/*
* Create an md5 hash of a text string and return it as hex
*
* md5 produces a 16 byte (128 bit) hash; double it for hex
*/
#define MD5_HASH_LEN 32
Datum
md5_text(PG_FUNCTION_ARGS)
{
text *in_text = PG_GETARG_TEXT_PP(0);
size_t len;
char hexsum[MD5_HASH_LEN + 1];
/* Calculate the length of the buffer using varlena metadata */
len = VARSIZE_ANY_EXHDR(in_text);
/* get the hash result */
if (pg_md5_hash(VARDATA_ANY(in_text), len, hexsum) == false)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* convert to text and return it */
PG_RETURN_TEXT_P(cstring_to_text(hexsum));
}
/*
* Create an md5 hash of a bytea field and return it as a hex string:
* 16-byte md5 digest is represented in 32 hex characters.
*/
Datum
md5_bytea(PG_FUNCTION_ARGS)
{
bytea *in = PG_GETARG_BYTEA_PP(0);
size_t len;
char hexsum[MD5_HASH_LEN + 1];
len = VARSIZE_ANY_EXHDR(in);
if (pg_md5_hash(VARDATA_ANY(in), len, hexsum) == false)
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
PG_RETURN_TEXT_P(cstring_to_text(hexsum));
}
/*
* Return the size of a datum, possibly compressed
*
* Works on any data type
*/
Datum
pg_column_size(PG_FUNCTION_ARGS)
{
Datum value = PG_GETARG_DATUM(0);
int32 result;
int typlen;
/* On first call, get the input type's typlen, and save at *fn_extra */
if (fcinfo->flinfo->fn_extra == NULL)
{
/* Lookup the datatype of the supplied argument */
Oid argtypeid = get_fn_expr_argtype(fcinfo->flinfo, 0);
typlen = get_typlen(argtypeid);
if (typlen == 0) /* should not happen */
elog(ERROR, "cache lookup failed for type %u", argtypeid);
fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
sizeof(int));
*((int *) fcinfo->flinfo->fn_extra) = typlen;
}
else
typlen = *((int *) fcinfo->flinfo->fn_extra);
if (typlen == -1)
{
/* varlena type, possibly toasted */
result = toast_datum_size(value);
}
else if (typlen == -2)
{
/* cstring */
result = strlen(DatumGetCString(value)) + 1;
}
else
{
/* ordinary fixed-width type */
result = typlen;
}
PG_RETURN_INT32(result);
}
/*
* string_agg - Concatenates values and returns string.
*
* Syntax: string_agg(value text, delimiter text = '') RETURNS text
*
* Note: Any NULL values are ignored. The first-call delimiter isn't
* actually used at all, and on subsequent calls the delimiter precedes
* the associated value.
*/
/* subroutine to initialize state */
static StringInfo
makeStringAggState(FunctionCallInfo fcinfo)
{
StringInfo state;
MemoryContext aggcontext;
MemoryContext oldcontext;
if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
{
/* cannot be called directly because of internal-type argument */
elog(ERROR, "string_agg_transfn called in non-aggregate context");
}
aggcontext = ((AggState*)fcinfo->context)->aggcontext;
/*
* Create state in aggregate context. It'll stay there across subsequent
* calls.
*/
oldcontext = MemoryContextSwitchTo(aggcontext);
state = makeStringInfo();
MemoryContextSwitchTo(oldcontext);
return state;
}
Datum
string_agg_transfn(PG_FUNCTION_ARGS)
{
StringInfo state;
state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
/* Append the element unless null. */
if (!PG_ARGISNULL(1))
{
if (state == NULL)
state = makeStringAggState(fcinfo);
appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
}
/*
* The transition type for string_agg() is declared to be "internal",
* which is a pass-by-value type the same size as a pointer.
*/
PG_RETURN_POINTER(state);
}
Datum
string_agg_delim_transfn(PG_FUNCTION_ARGS)
{
StringInfo state;
state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
/* Append the value unless null. */
if (!PG_ARGISNULL(1))
{
/* On the first time through, we ignore the delimiter. */
if (state == NULL)
state = makeStringAggState(fcinfo);
else if (!PG_ARGISNULL(2))
appendStringInfoText(state, PG_GETARG_TEXT_PP(2)); /* delimiter */
appendStringInfoText(state, PG_GETARG_TEXT_PP(1)); /* value */
}
/*
* The transition type for string_agg() is declared to be "internal",
* which is a pass-by-value type the same size as a pointer.
*/
PG_RETURN_POINTER(state);
}
Datum
string_agg_finalfn(PG_FUNCTION_ARGS)
{
StringInfo state;
/* cannot be called directly because of internal-type argument */
if (!(fcinfo->context && IsA(fcinfo->context, AggState)))
{
/* cannot be called directly because of internal-type argument */
elog(ERROR, "string_agg_finalfn called in non-aggregate context");
}
state = PG_ARGISNULL(0) ? NULL : (StringInfo) PG_GETARG_POINTER(0);
if (state != NULL)
PG_RETURN_TEXT_P(cstring_to_text(state->data));
else
PG_RETURN_NULL();
}