src/bin/psql/stringutils.c - hawq - Git at Google

 /*
  * psql - the PostgreSQL interactive terminal
  *
  * Copyright (c) 2000-2010, PostgreSQL Global Development Group
  *
  * src/bin/psql/stringutils.c
  */
 #include "postgres_fe.h"

 #include <ctype.h>

 #include "common.h"
 #include "stringutils.h"


 static void strip_quotes(char *source, char quote, char escape, int encoding);


 /*
  * Replacement for strtok() (a.k.a. poor man's flex)
  *
  * Splits a string into tokens, returning one token per call, then NULL
  * when no more tokens exist in the given string.
  *
  * The calling convention is similar to that of strtok, but with more
  * frammishes.
  *
  * s -			string to parse, if NULL continue parsing the last string
  * whitespace - set of whitespace characters that separate tokens
  * delim -		set of non-whitespace separator characters (or NULL)
  * quote -		set of characters that can quote a token (NULL if none)
  * escape -		character that can quote quotes (0 if none)
  * e_strings -	if TRUE, treat E'...' syntax as a valid token
  * del_quotes - if TRUE, strip quotes from the returned token, else return
  *				it exactly as found in the string
  * encoding -	the active character-set encoding
  *
  * Characters in 'delim', if any, will be returned as single-character
  * tokens unless part of a quoted token.
  *
  * Double occurrences of the quoting character are always taken to represent
  * a single quote character in the data.  If escape isn't 0, then escape
  * followed by anything (except \0) is a data character too.
  *
  * The combination of e_strings and del_quotes both TRUE is not currently
  * handled.  This could be fixed but it's not needed anywhere at the moment.
  *
  * Note that the string s is _not_ overwritten in this implementation.
  *
  * NB: it's okay to vary delim, quote, and escape from one call to the
  * next on a single source string, but changing whitespace is a bad idea
  * since you might lose data.
  */
 char *
 strtokx(const char *s,
 		const char *whitespace,
 		const char *delim,
 		const char *quote,
 		char escape,
 		bool e_strings,
 		bool del_quotes,
 		int encoding)
 {
 	static char *storage = NULL;/* store the local copy of the users string
 								 * here */
 	static char *string = NULL; /* pointer into storage where to continue on
 								 * next call */

 	/* variously abused variables: */
 	unsigned int offset;
 	char	   *start;
 	char	   *p;

 	if (s)
 	{
 		free(storage);

 		/*
 		 * We may need extra space to insert delimiter nulls for adjacent
 		 * tokens.	2X the space is a gross overestimate, but it's unlikely
 		 * that this code will be used on huge strings anyway.
 		 */
 		storage = pg_malloc(2 * strlen(s) + 1);
 		strcpy(storage, s);
 		string = storage;
 	}

 	if (!storage)
 		return NULL;

 	/* skip leading whitespace */
 	offset = strspn(string, whitespace);
 	start = &string[offset];

 	/* end of string reached? */
 	if (*start == '\0')
 	{
 		/* technically we don't need to free here, but we're nice */
 		free(storage);
 		storage = NULL;
 		string = NULL;
 		return NULL;
 	}

 	/* test if delimiter character */
 	if (delim && strchr(delim, *start))
 	{
 		/*
 		 * If not at end of string, we need to insert a null to terminate the
 		 * returned token.	We can just overwrite the next character if it
 		 * happens to be in the whitespace set ... otherwise move over the
 		 * rest of the string to make room.  (This is why we allocated extra
 		 * space above).
 		 */
 		p = start + 1;
 		if (*p != '\0')
 		{
 			if (!strchr(whitespace, *p))
 				memmove(p + 1, p, strlen(p) + 1);
 			*p = '\0';
 			string = p + 1;
 		}
 		else
 		{
 			/* at end of string, so no extra work */
 			string = p;
 		}

 		return start;
 	}

 	/* check for E string */
 	p = start;
 	if (e_strings &&
 		(*p == 'E' || *p == 'e') &&
 		p[1] == '\'')
 	{
 		quote = "'";
 		escape = '\\';			/* if std strings before, not any more */
 		p++;
 	}

 	/* test if quoting character */
 	if (quote && strchr(quote, *p))
 	{
 		/* okay, we have a quoted token, now scan for the closer */
 		char		thisquote = *p++;

 		for (; *p; p += PQmblen(p, encoding))
 		{
 			if (*p == escape && p[1] != '\0')
 				p++;			/* process escaped anything */
 			else if (*p == thisquote && p[1] == thisquote)
 				p++;			/* process doubled quote */
 			else if (*p == thisquote)
 			{
 				p++;			/* skip trailing quote */
 				break;
 			}
 		}

 		/*
 		 * If not at end of string, we need to insert a null to terminate the
 		 * returned token.	See notes above.
 		 */
 		if (*p != '\0')
 		{
 			if (!strchr(whitespace, *p))
 				memmove(p + 1, p, strlen(p) + 1);
 			*p = '\0';
 			string = p + 1;
 		}
 		else
 		{
 			/* at end of string, so no extra work */
 			string = p;
 		}

 		/* Clean up the token if caller wants that */
 		if (del_quotes)
 			strip_quotes(start, thisquote, escape, encoding);

 		return start;
 	}

 	/*
 	 * Otherwise no quoting character.	Scan till next whitespace, delimiter
 	 * or quote.  NB: at this point, *start is known not to be '\0',
 	 * whitespace, delim, or quote, so we will consume at least one character.
 	 */
 	offset = strcspn(start, whitespace);

 	if (delim)
 	{
 		unsigned int offset2 = strcspn(start, delim);

 		if (offset > offset2)
 			offset = offset2;
 	}

 	if (quote)
 	{
 		unsigned int offset2 = strcspn(start, quote);

 		if (offset > offset2)
 			offset = offset2;
 	}

 	p = start + offset;

 	/*
 	 * If not at end of string, we need to insert a null to terminate the
 	 * returned token.	See notes above.
 	 */
 	if (*p != '\0')
 	{
 		if (!strchr(whitespace, *p))
 			memmove(p + 1, p, strlen(p) + 1);
 		*p = '\0';
 		string = p + 1;
 	}
 	else
 	{
 		/* at end of string, so no extra work */
 		string = p;
 	}

 	return start;
 }


 /*
  * strip_quotes
  *
  * Remove quotes from the string at *source.  Leading and trailing occurrences
  * of 'quote' are removed; embedded double occurrences of 'quote' are reduced
  * to single occurrences; if 'escape' is not 0 then 'escape' removes special
  * significance of next character.
  *
  * Note that the source string is overwritten in-place.
  */
 static void
 strip_quotes(char *source, char quote, char escape, int encoding)
 {
 	char	   *src;
 	char	   *dst;

 	psql_assert(source);
 	psql_assert(quote);

 	src = dst = source;

 	if (*src && *src == quote)
 		src++;					/* skip leading quote */

 	while (*src)
 	{
 		char		c = *src;
 		int			i;

 		if (c == quote && src[1] == '\0')
 			break;				/* skip trailing quote */
 		else if (c == quote && src[1] == quote)
 			src++;				/* process doubled quote */
 		else if (c == escape && src[1] != '\0')
 			src++;				/* process escaped character */

 		i = PQmblen(src, encoding);
 		while (i--)
 			*dst++ = *src++;
 	}

 	*dst = '\0';
 }
	/*
	* psql - the PostgreSQL interactive terminal
	*
	* Copyright (c) 2000-2010, PostgreSQL Global Development Group
	*
	* src/bin/psql/stringutils.c
	*/
	#include "postgres_fe.h"

	#include <ctype.h>

	#include "common.h"
	#include "stringutils.h"


	static void strip_quotes(char *source, char quote, char escape, int encoding);


	/*
	* Replacement for strtok() (a.k.a. poor man's flex)
	*
	* Splits a string into tokens, returning one token per call, then NULL
	* when no more tokens exist in the given string.
	*
	* The calling convention is similar to that of strtok, but with more
	* frammishes.
	*
	* s - string to parse, if NULL continue parsing the last string
	* whitespace - set of whitespace characters that separate tokens
	* delim - set of non-whitespace separator characters (or NULL)
	* quote - set of characters that can quote a token (NULL if none)
	* escape - character that can quote quotes (0 if none)
	* e_strings - if TRUE, treat E'...' syntax as a valid token
	* del_quotes - if TRUE, strip quotes from the returned token, else return
	* it exactly as found in the string
	* encoding - the active character-set encoding
	*
	* Characters in 'delim', if any, will be returned as single-character
	* tokens unless part of a quoted token.
	*
	* Double occurrences of the quoting character are always taken to represent
	* a single quote character in the data. If escape isn't 0, then escape
	* followed by anything (except \0) is a data character too.
	*
	* The combination of e_strings and del_quotes both TRUE is not currently
	* handled. This could be fixed but it's not needed anywhere at the moment.
	*
	* Note that the string s is _not_ overwritten in this implementation.
	*
	* NB: it's okay to vary delim, quote, and escape from one call to the
	* next on a single source string, but changing whitespace is a bad idea
	* since you might lose data.
	*/
	char *
	strtokx(const char *s,
	const char *whitespace,
	const char *delim,
	const char *quote,
	char escape,
	bool e_strings,
	bool del_quotes,
	int encoding)
	{
	static char storage = NULL;/ store the local copy of the users string
	* here */
	static char string = NULL; / pointer into storage where to continue on
	* next call */

	/* variously abused variables: */
	unsigned int offset;
	char *start;
	char *p;

	if (s)
	{
	free(storage);

	/*
	* We may need extra space to insert delimiter nulls for adjacent
	* tokens. 2X the space is a gross overestimate, but it's unlikely
	* that this code will be used on huge strings anyway.
	*/
	storage = pg_malloc(2 * strlen(s) + 1);
	strcpy(storage, s);
	string = storage;
	}

	if (!storage)
	return NULL;

	/* skip leading whitespace */
	offset = strspn(string, whitespace);
	start = &string[offset];

	/* end of string reached? */
	if (*start == '\0')
	{
	/* technically we don't need to free here, but we're nice */
	free(storage);
	storage = NULL;
	string = NULL;
	return NULL;
	}

	/* test if delimiter character */
	if (delim && strchr(delim, *start))
	{
	/*
	* If not at end of string, we need to insert a null to terminate the
	* returned token. We can just overwrite the next character if it
	* happens to be in the whitespace set ... otherwise move over the
	* rest of the string to make room. (This is why we allocated extra
	* space above).
	*/
	p = start + 1;
	if (*p != '\0')
	{
	if (!strchr(whitespace, *p))
	memmove(p + 1, p, strlen(p) + 1);
	*p = '\0';
	string = p + 1;
	}
	else
	{
	/* at end of string, so no extra work */
	string = p;
	}

	return start;
	}

	/* check for E string */
	p = start;
	if (e_strings &&
	(p == 'E' \|\| p == 'e') &&
	p[1] == '\'')
	{
	quote = "'";
	escape = '\\'; /* if std strings before, not any more */
	p++;
	}

	/* test if quoting character */
	if (quote && strchr(quote, *p))
	{
	/* okay, we have a quoted token, now scan for the closer */
	char thisquote = *p++;

	for (; *p; p += PQmblen(p, encoding))
	{
	if (*p == escape && p[1] != '\0')
	p++; /* process escaped anything */
	else if (*p == thisquote && p[1] == thisquote)
	p++; /* process doubled quote */
	else if (*p == thisquote)
	{
	p++; /* skip trailing quote */
	break;
	}
	}

	/*
	* If not at end of string, we need to insert a null to terminate the
	* returned token. See notes above.
	*/
	if (*p != '\0')
	{
	if (!strchr(whitespace, *p))
	memmove(p + 1, p, strlen(p) + 1);
	*p = '\0';
	string = p + 1;
	}
	else
	{
	/* at end of string, so no extra work */
	string = p;
	}

	/* Clean up the token if caller wants that */
	if (del_quotes)
	strip_quotes(start, thisquote, escape, encoding);

	return start;
	}

	/*
	* Otherwise no quoting character. Scan till next whitespace, delimiter
	* or quote. NB: at this point, *start is known not to be '\0',
	* whitespace, delim, or quote, so we will consume at least one character.
	*/
	offset = strcspn(start, whitespace);

	if (delim)
	{
	unsigned int offset2 = strcspn(start, delim);

	if (offset > offset2)
	offset = offset2;
	}

	if (quote)
	{
	unsigned int offset2 = strcspn(start, quote);

	if (offset > offset2)
	offset = offset2;
	}

	p = start + offset;

	/*
	* If not at end of string, we need to insert a null to terminate the
	* returned token. See notes above.
	*/
	if (*p != '\0')
	{
	if (!strchr(whitespace, *p))
	memmove(p + 1, p, strlen(p) + 1);
	*p = '\0';
	string = p + 1;
	}
	else
	{
	/* at end of string, so no extra work */
	string = p;
	}

	return start;
	}


	/*
	* strip_quotes
	*
	* Remove quotes from the string at *source. Leading and trailing occurrences
	* of 'quote' are removed; embedded double occurrences of 'quote' are reduced
	* to single occurrences; if 'escape' is not 0 then 'escape' removes special
	* significance of next character.
	*
	* Note that the source string is overwritten in-place.
	*/
	static void
	strip_quotes(char *source, char quote, char escape, int encoding)
	{
	char *src;
	char *dst;

	psql_assert(source);
	psql_assert(quote);

	src = dst = source;

	if (src && src == quote)
	src++; /* skip leading quote */

	while (*src)
	{
	char c = *src;
	int i;

	if (c == quote && src[1] == '\0')
	break; /* skip trailing quote */
	else if (c == quote && src[1] == quote)
	src++; /* process doubled quote */
	else if (c == escape && src[1] != '\0')
	src++; /* process escaped character */

	i = PQmblen(src, encoding);
	while (i--)
	dst++ = src++;
	}

	*dst = '\0';
	}