| /*------------------------------------------------------------------------- |
| * |
| * scansup.c |
| * support routines for the lex/flex scanner, used by both the normal |
| * backend as well as the bootstrap backend |
| * |
| * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group |
| * Portions Copyright (c) 1994, Regents of the University of California |
| * |
| * |
| * IDENTIFICATION |
| * $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.37 2009/01/01 17:23:46 momjian Exp $ |
| * |
| *------------------------------------------------------------------------- |
| */ |
| #include "postgres.h" |
| |
| #include <ctype.h> |
| |
| #include "parser/scansup.h" |
| #include "mb/pg_wchar.h" |
| |
| |
| /* ---------------- |
| * scanstr |
| * |
| * if the string passed in has escaped codes, map the escape codes to actual |
| * chars |
| * |
| * the string returned is palloc'd and should eventually be pfree'd by the |
| * caller! |
| * ---------------- |
| */ |
| |
| char * |
| scanstr(const char *s) |
| { |
| char *newStr; |
| int len, |
| i, |
| j; |
| |
| if (s == NULL || s[0] == '\0') |
| return pstrdup(""); |
| |
| len = strlen(s); |
| |
| newStr = palloc(len + 1); /* string cannot get longer */ |
| |
| for (i = 0, j = 0; i < len; i++) |
| { |
| if (s[i] == '\'') |
| { |
| /* |
| * Note: if scanner is working right, unescaped quotes can only |
| * appear in pairs, so there should be another character. |
| */ |
| i++; |
| newStr[j] = s[i]; |
| } |
| else if (s[i] == '\\') |
| { |
| i++; |
| switch (s[i]) |
| { |
| case 'b': |
| newStr[j] = '\b'; |
| break; |
| case 'f': |
| newStr[j] = '\f'; |
| break; |
| case 'n': |
| newStr[j] = '\n'; |
| break; |
| case 'r': |
| newStr[j] = '\r'; |
| break; |
| case 't': |
| newStr[j] = '\t'; |
| break; |
| case '0': |
| case '1': |
| case '2': |
| case '3': |
| case '4': |
| case '5': |
| case '6': |
| case '7': |
| { |
| int k; |
| long octVal = 0; |
| |
| for (k = 0; |
| s[i + k] >= '0' && s[i + k] <= '7' && k < 3; |
| k++) |
| octVal = (octVal << 3) + (s[i + k] - '0'); |
| i += k - 1; |
| newStr[j] = ((char) octVal); |
| } |
| break; |
| default: |
| newStr[j] = s[i]; |
| break; |
| } /* switch */ |
| } /* s[i] == '\\' */ |
| else |
| newStr[j] = s[i]; |
| j++; |
| } |
| newStr[j] = '\0'; |
| return newStr; |
| } |
| |
| |
| /* |
| * downcase_truncate_identifier() --- do appropriate downcasing and |
| * truncation of an unquoted identifier. Optionally warn of truncation. |
| * |
| * Returns a palloc'd string containing the adjusted identifier. |
| * |
| * Note: in some usages the passed string is not null-terminated. |
| * |
| * Note: the API of this function is designed to allow for downcasing |
| * transformations that increase the string length, but we don't yet |
| * support that. If you want to implement it, you'll need to fix |
| * SplitIdentifierString() in utils/adt/varlena.c. |
| */ |
| char * |
| downcase_truncate_identifier(const char *ident, int len, bool warn) |
| { |
| char *result; |
| int i; |
| |
| result = palloc(len + 1); |
| |
| /* |
| * SQL99 specifies Unicode-aware case normalization, which we don't yet |
| * have the infrastructure for. Instead we use tolower() to provide a |
| * locale-aware translation. However, there are some locales where this |
| * is not right either (eg, Turkish may do strange things with 'i' and |
| * 'I'). Our current compromise is to use tolower() for characters with |
| * the high bit set, and use an ASCII-only downcasing for 7-bit |
| * characters. |
| */ |
| for (i = 0; i < len; i++) |
| { |
| unsigned char ch = (unsigned char) ident[i]; |
| |
| if (ch >= 'A' && ch <= 'Z') |
| ch += 'a' - 'A'; |
| else if (IS_HIGHBIT_SET(ch) && isupper(ch)) |
| ch = tolower(ch); |
| result[i] = (char) ch; |
| } |
| result[i] = '\0'; |
| |
| if (i >= NAMEDATALEN) |
| truncate_identifier(result, i, warn); |
| |
| return result; |
| } |
| |
| /* |
| * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes. |
| * |
| * The given string is modified in-place, if necessary. A warning is |
| * issued if requested. |
| * |
| * We require the caller to pass in the string length since this saves a |
| * strlen() call in some common usages. |
| */ |
| void |
| truncate_identifier(char *ident, int len, bool warn) |
| { |
| if (len >= NAMEDATALEN) |
| { |
| len = pg_mbcliplen(ident, len, NAMEDATALEN - 1); |
| if (warn) |
| ereport(NOTICE, |
| (errcode(ERRCODE_NAME_TOO_LONG), |
| errmsg("identifier \"%s\" will be truncated to \"%.*s\"", |
| ident, len, ident))); |
| ident[len] = '\0'; |
| } |
| } |
| |
| /* |
| * scanner_isspace() --- return TRUE if flex scanner considers char whitespace |
| * |
| * This should be used instead of the potentially locale-dependent isspace() |
| * function when it's important to match the lexer's behavior. |
| * |
| * In principle we might need similar functions for isalnum etc, but for the |
| * moment only isspace seems needed. |
| */ |
| bool |
| scanner_isspace(char ch) |
| { |
| /* This must match scan.l's list of {space} characters */ |
| /* and plpgsql's scan.l as well */ |
| if (ch == ' ' || |
| ch == '\t' || |
| ch == '\n' || |
| ch == '\r' || |
| ch == '\f') |
| return true; |
| return false; |
| } |