| /*------------------------------------------------------------------------- |
| * |
| * encode.c |
| * Various data encoding/decoding things. |
| * |
| * Copyright (c) 2001-2009, PostgreSQL Global Development Group |
| * |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/utils/adt/encode.c,v 1.23.2.1 2009/08/30 16:53:37 tgl Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <ctype.h> |
| |
| #include "utils/builtins.h" |
| |
| |
| struct pg_encoding |
| { |
| unsigned (*encode_len) (const char *data, unsigned dlen); |
| unsigned (*decode_len) (const char *data, unsigned dlen); |
| unsigned (*encode) (const char *data, unsigned dlen, char *res); |
| unsigned (*decode) (const char *data, unsigned dlen, char *res); |
| }; |
| |
| static const struct pg_encoding *pg_find_encoding(const char *name); |
| |
| /* |
| * SQL functions. |
| */ |
| |
| Datum |
| binary_encode(PG_FUNCTION_ARGS) |
| { |
| bytea *data = PG_GETARG_BYTEA_P(0); |
| Datum name = PG_GETARG_DATUM(1); |
| text *result; |
| char *namebuf; |
| int datalen, |
| resultlen, |
| res; |
| const struct pg_encoding *enc; |
| |
| datalen = VARSIZE(data) - VARHDRSZ; |
| |
| namebuf = TextDatumGetCString(name); |
| |
| enc = pg_find_encoding(namebuf); |
| if (enc == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("unrecognized encoding: \"%s\"", namebuf))); |
| |
| resultlen = enc->encode_len(VARDATA(data), datalen); |
| result = palloc(VARHDRSZ + resultlen); |
| |
| res = enc->encode(VARDATA(data), datalen, VARDATA(result)); |
| |
| /* Make this FATAL 'cause we've trodden on memory ... */ |
| if (res > resultlen) |
| elog(FATAL, "overflow - encode estimate too small"); |
| |
| SET_VARSIZE(result, VARHDRSZ + res); |
| |
| PG_RETURN_TEXT_P(result); |
| } |
| |
| Datum |
| binary_decode(PG_FUNCTION_ARGS) |
| { |
| text *data = PG_GETARG_TEXT_P(0); |
| Datum name = PG_GETARG_DATUM(1); |
| bytea *result; |
| char *namebuf; |
| int datalen, |
| resultlen, |
| res; |
| const struct pg_encoding *enc; |
| |
| datalen = VARSIZE(data) - VARHDRSZ; |
| |
| namebuf = TextDatumGetCString(name); |
| |
| enc = pg_find_encoding(namebuf); |
| if (enc == NULL) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("unrecognized encoding: \"%s\"", namebuf))); |
| |
| resultlen = enc->decode_len(VARDATA(data), datalen); |
| result = palloc(VARHDRSZ + resultlen); |
| |
| res = enc->decode(VARDATA(data), datalen, VARDATA(result)); |
| |
| /* Make this FATAL 'cause we've trodden on memory ... */ |
| if (res > resultlen) |
| elog(FATAL, "overflow - decode estimate too small"); |
| |
| SET_VARSIZE(result, VARHDRSZ + res); |
| |
| PG_RETURN_BYTEA_P(result); |
| } |
| |
| |
| /* |
| * HEX |
| */ |
| |
| static const char *hextbl = "0123456789abcdef"; |
| |
| static const int8 hexlookup[128] = { |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, |
| -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| }; |
| |
| static unsigned |
| hex_encode(const char *src, unsigned len, char *dst) |
| { |
| const char *end = src + len; |
| |
| while (src < end) |
| { |
| *dst++ = hextbl[(*src >> 4) & 0xF]; |
| *dst++ = hextbl[*src & 0xF]; |
| src++; |
| } |
| return len * 2; |
| } |
| |
| static char |
| get_hex(char c) |
| { |
| int res = -1; |
| |
| if (c > 0 && c < 127) |
| res = hexlookup[(unsigned char) c]; |
| |
| if (res < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid hexadecimal digit: \"%c\"", c))); |
| |
| return (char) res; |
| } |
| |
| static unsigned |
| hex_decode(const char *src, unsigned len, char *dst) |
| { |
| const char *s, |
| *srcend; |
| char v1, |
| v2, |
| *p; |
| |
| srcend = src + len; |
| s = src; |
| p = dst; |
| while (s < srcend) |
| { |
| if (*s == ' ' || *s == '\n' || *s == '\t' || *s == '\r') |
| { |
| s++; |
| continue; |
| } |
| v1 = get_hex(*s++) << 4; |
| if (s >= srcend) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid hexadecimal data: odd number of digits"))); |
| |
| v2 = get_hex(*s++); |
| *p++ = v1 | v2; |
| } |
| |
| return p - dst; |
| } |
| |
| static unsigned |
| hex_enc_len(const char *src, unsigned srclen) |
| { |
| return srclen << 1; |
| } |
| |
| static unsigned |
| hex_dec_len(const char *src, unsigned srclen) |
| { |
| return srclen >> 1; |
| } |
| |
| /* |
| * BASE64 |
| */ |
| |
| static const char _base64[] = |
| "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
| |
| static const int8 b64lookup[128] = { |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, |
| -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, |
| 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, |
| -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
| 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, |
| -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
| 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1, |
| }; |
| |
| static unsigned |
| b64_encode(const char *src, unsigned len, char *dst) |
| { |
| char *p, |
| *lend = dst + 76; |
| const char *s, |
| *end = src + len; |
| int pos = 2; |
| uint32 buf = 0; |
| |
| s = src; |
| p = dst; |
| |
| while (s < end) |
| { |
| buf |= (unsigned char) *s << (pos << 3); |
| pos--; |
| s++; |
| |
| /* write it out */ |
| if (pos < 0) |
| { |
| *p++ = _base64[(buf >> 18) & 0x3f]; |
| *p++ = _base64[(buf >> 12) & 0x3f]; |
| *p++ = _base64[(buf >> 6) & 0x3f]; |
| *p++ = _base64[buf & 0x3f]; |
| |
| pos = 2; |
| buf = 0; |
| } |
| if (p >= lend) |
| { |
| *p++ = '\n'; |
| lend = p + 76; |
| } |
| } |
| if (pos != 2) |
| { |
| *p++ = _base64[(buf >> 18) & 0x3f]; |
| *p++ = _base64[(buf >> 12) & 0x3f]; |
| *p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '='; |
| *p++ = '='; |
| } |
| |
| return p - dst; |
| } |
| |
| static unsigned |
| b64_decode(const char *src, unsigned len, char *dst) |
| { |
| const char *srcend = src + len, |
| *s = src; |
| char *p = dst; |
| char c; |
| int b = 0; |
| uint32 buf = 0; |
| int pos = 0, |
| end = 0; |
| |
| while (s < srcend) |
| { |
| c = *s++; |
| |
| if (c == ' ' || c == '\t' || c == '\n' || c == '\r') |
| continue; |
| |
| if (c == '=') |
| { |
| /* end sequence */ |
| if (!end) |
| { |
| if (pos == 2) |
| end = 1; |
| else if (pos == 3) |
| end = 2; |
| else |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("unexpected \"=\""))); |
| } |
| b = 0; |
| } |
| else |
| { |
| b = -1; |
| if (c > 0 && c < 127) |
| b = b64lookup[(unsigned char) c]; |
| if (b < 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid symbol"))); |
| } |
| /* add it to buffer */ |
| buf = (buf << 6) + b; |
| pos++; |
| if (pos == 4) |
| { |
| *p++ = (buf >> 16) & 255; |
| if (end == 0 || end > 1) |
| *p++ = (buf >> 8) & 255; |
| if (end == 0 || end > 2) |
| *p++ = buf & 255; |
| buf = 0; |
| pos = 0; |
| } |
| } |
| |
| if (pos != 0) |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_PARAMETER_VALUE), |
| errmsg("invalid end sequence"))); |
| |
| return p - dst; |
| } |
| |
| |
| static unsigned |
| b64_enc_len(const char *src, unsigned srclen) |
| { |
| /* 3 bytes will be converted to 4, linefeed after 76 chars */ |
| return (srclen + 2) * 4 / 3 + srclen / (76 * 3 / 4); |
| } |
| |
| static unsigned |
| b64_dec_len(const char *src, unsigned srclen) |
| { |
| return (srclen * 3) >> 2; |
| } |
| |
| /* |
| * Escape |
| * Minimally escape bytea to text. |
| * De-escape text to bytea. |
| * |
| * We must escape zero bytes and high-bit-set bytes to avoid generating |
| * text that might be invalid in the current encoding, or that might |
| * change to something else if passed through an encoding conversion |
| * (leading to failing to de-escape to the original bytea value). |
| * Also of course backslash itself has to be escaped. |
| * |
| * De-escaping processes \\ and any \### octal |
| */ |
| |
| #define VAL(CH) ((CH) - '0') |
| #define DIG(VAL) ((VAL) + '0') |
| |
| static unsigned |
| esc_encode(const char *src, unsigned srclen, char *dst) |
| { |
| const char *end = src + srclen; |
| char *rp = dst; |
| int len = 0; |
| |
| while (src < end) |
| { |
| unsigned char c = (unsigned char) *src; |
| |
| if (c == '\0' || IS_HIGHBIT_SET(c)) |
| { |
| rp[0] = '\\'; |
| rp[1] = DIG(c >> 6); |
| rp[2] = DIG((c >> 3) & 7); |
| rp[3] = DIG(c & 7); |
| rp += 4; |
| len += 4; |
| } |
| else if (c == '\\') |
| { |
| rp[0] = '\\'; |
| rp[1] = '\\'; |
| rp += 2; |
| len += 2; |
| } |
| else |
| { |
| *rp++ = c; |
| len++; |
| } |
| |
| src++; |
| } |
| |
| return len; |
| } |
| |
| static unsigned |
| esc_decode(const char *src, unsigned srclen, char *dst) |
| { |
| const char *end = src + srclen; |
| char *rp = dst; |
| int len = 0; |
| |
| while (src < end) |
| { |
| if (src[0] != '\\') |
| *rp++ = *src++; |
| else if (src + 3 < end && |
| (src[1] >= '0' && src[1] <= '3') && |
| (src[2] >= '0' && src[2] <= '7') && |
| (src[3] >= '0' && src[3] <= '7')) |
| { |
| int val; |
| |
| val = VAL(src[1]); |
| val <<= 3; |
| val += VAL(src[2]); |
| val <<= 3; |
| *rp++ = val + VAL(src[3]); |
| src += 4; |
| } |
| else if (src + 1 < end && |
| (src[1] == '\\')) |
| { |
| *rp++ = '\\'; |
| src += 2; |
| } |
| else |
| { |
| /* |
| * One backslash, not followed by ### valid octal. Should never |
| * get here, since esc_dec_len does same check. |
| */ |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| errmsg("invalid input syntax for type bytea"))); |
| } |
| |
| len++; |
| } |
| |
| return len; |
| } |
| |
| static unsigned |
| esc_enc_len(const char *src, unsigned srclen) |
| { |
| const char *end = src + srclen; |
| int len = 0; |
| |
| while (src < end) |
| { |
| if (*src == '\0' || IS_HIGHBIT_SET(*src)) |
| len += 4; |
| else if (*src == '\\') |
| len += 2; |
| else |
| len++; |
| |
| src++; |
| } |
| |
| return len; |
| } |
| |
| static unsigned |
| esc_dec_len(const char *src, unsigned srclen) |
| { |
| const char *end = src + srclen; |
| int len = 0; |
| |
| while (src < end) |
| { |
| if (src[0] != '\\') |
| src++; |
| else if (src + 3 < end && |
| (src[1] >= '0' && src[1] <= '3') && |
| (src[2] >= '0' && src[2] <= '7') && |
| (src[3] >= '0' && src[3] <= '7')) |
| { |
| /* |
| * backslash + valid octal |
| */ |
| src += 4; |
| } |
| else if (src + 1 < end && |
| (src[1] == '\\')) |
| { |
| /* |
| * two backslashes = backslash |
| */ |
| src += 2; |
| } |
| else |
| { |
| /* |
| * one backslash, not followed by ### valid octal |
| */ |
| ereport(ERROR, |
| (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), |
| errmsg("invalid input syntax for type bytea"))); |
| } |
| |
| len++; |
| } |
| return len; |
| } |
| |
| /* |
| * Common |
| */ |
| |
| static const struct |
| { |
| const char *name; |
| struct pg_encoding enc; |
| } enclist[] = |
| |
| { |
| { |
| "hex", |
| { |
| hex_enc_len, hex_dec_len, hex_encode, hex_decode |
| } |
| }, |
| { |
| "base64", |
| { |
| b64_enc_len, b64_dec_len, b64_encode, b64_decode |
| } |
| }, |
| { |
| "escape", |
| { |
| esc_enc_len, esc_dec_len, esc_encode, esc_decode |
| } |
| }, |
| { |
| NULL, |
| { |
| NULL, NULL, NULL, NULL |
| } |
| } |
| }; |
| |
| static const struct pg_encoding * |
| pg_find_encoding(const char *name) |
| { |
| int i; |
| |
| for (i = 0; enclist[i].name; i++) |
| if (pg_strcasecmp(enclist[i].name, name) == 0) |
| return &enclist[i].enc; |
| |
| return NULL; |
| } |