src/guththila/Token.hpp - axis-axis1-c - Git at Google

 #ifndef TOKEN_HPP
 #define TOKEN_HPP

 struct token_t
 {
   int type;
   char *start, *end;

   enum
     {
       Unknown, Name, AttValue, Prefix, CharData
     };

   void
   relocate (int offset)
   {
     start -= offset;
     end -= offset;
   }

   int
   isOfType (int type)
   {
     return this->type == type;

   }

   int
   length ()
   {
     if (end)
       return end - start + 1;
     else
       throw new XmlPullParserException ();
   }


 #ifndef UNICODE_OUT /* To Obtain UTF-8 Output */

   /*!added two toString methods, Because according to output format
    * converstion function will be vary
    * Here memcpy was used instead of strcpy
    * strcpy can't survive with UTF-16 string because , it ( UTF-16
    * string) has null characters middle of string  */

   char*
   toString (unsigned int unicodeState)
   {
     int len = length ();
     char *p = (char *)malloc (len + 1);
     memcpy (p, start, len);
     p[len] = 0;
     unicodeLength = len;
     switch (unicodeState)
       {
       case None:
 	return p;
 	break;
       case LE:
       case BE:
 	return (char *)convertUtf16toUtf8 (p, unicodeLength);
 	break;
       };
   }
 #endif


 #ifdef UNICODE_OUT /*! To Obtain UTF-16 Output  */


   char*
   toString (unsigned int unicodeState)
   {
     int len = length ();
     char *p = (char *)malloc (len + 1);
     memcpy (p, start, len);
     p[len] = 0;
     unicodeLength = len;
     switch (unicodeState)
       {
       case None:
 	return (char *)convertUtf8toUtf16 (p, unicodeLength);
 	break;
       case LE:
       case BE:
 	return p;
 	break;
       };
   }
 #endif


   /*! To Determine length of Given UTF-8 character  */

   UTF8_char
   lengthUtf8Char (char nextChar)
   {
     UTF8_char length;

     if ((nextChar & 0x80) == 0x00)
       length = 1;
     else if ((nextChar & 0xe0) == 0xc0)
       length = 2;
     else if ((nextChar & 0xf0) == 0xe0)
       length = 3;
     else if ((nextChar & 0xf1) == 0xf0)
       length = 4;
     else if ((nextChar & 0xfc) == 0xf1)
       length = 5;
     else
       length = 6;

     return length;
   }


   /*!mask will use to retrieve bits from Multibyte character  */
   UTF8_char
   maskUtf8Char (UTF8_char length)
   {
     UTF8_char mask;

     switch (length)
       {
       case 1:
 	mask = 0x7f;
 	break;
       case 2:
 	mask = 0x1f;
       case 3:
 	mask = 0x0f;
 	break;
       case 4:
 	mask = 0x07;
 	break;
       case 5:
 	mask = 0x03;
 	break;
       case 6:
 	mask = 0x01;
 	break;
       };

     return mask;
   }


   /*! This one will Convert UTF-8 buffer to UTF-16 Buffer
    * we have to give char pointer to utf-8 buffer and length of
    *that buffer , Then it will return pointer to UTF-16 Buffer*/

   UTF16_char *
   convertUtf8toUtf16 (char *p, int len)
   {
     char *s;
     s = p;
     Array < UTF16_char > *outBuffer;
     outBuffer = new Array < UTF16_char > (len);
     char mask;
     char utf8Buffer[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
     UTF32_char result;
     for (int i = 0; i < len; )
       {
 	UTF8_char length = lengthUtf8Char (p[i]);
 	mask = maskUtf8Char (length);
 	for (int j = 0; j < length; j++)
 	  utf8Buffer[j] = p[i++];

 	result = utf8Buffer[0] & mask;
 	for (int k = 1; k < length; k++)
 	  {
 	    if ((utf8Buffer[k] & 0xc0) != 0x80)
 	      {
 		throw XmlPullParserException ();
 		break;
 	      }
 	    result <<= 6;
 	    result |= (utf8Buffer[k] & 0x3f);
 	  }
 	outBuffer->append ((UTF16_char) result);
       }
     free (s);
     UTF8_char size = outBuffer->count ();
     unicodeLength = size * sizeof (unsigned short);
     UTF16_char *out = (UTF16_char *)malloc ((size+1) * sizeof (unsigned short));
     for (int l = 0; l < size; l++)
       out[l] = outBuffer->getElement (l);
     out[size] = 0x0;
     delete outBuffer;
     return out;
   }


   /*! This function use to determine, length of UTF-8 character of
    *  Given UTF-16 character  */
   int
   lengthUtf16Char (UTF32_char utf16Char)
   {
     int utf16CharLength;

     if (0x80 > utf16Char)
       utf16CharLength = 1;
     else if (0x800 > utf16Char)
       utf16CharLength = 2;
     else if (0x10000 > utf16Char)
       utf16CharLength = 3;
     else if (0x200000 > utf16Char)
       utf16CharLength = 4;
     else if (0x4000000 > utf16Char)
       utf16CharLength = 5;
     else
       utf16CharLength = 6;

     return utf16CharLength;
   }


   /*! after Giveing UTF-16 Character and its equivalent UTf-8 length
    *this function will create UTF-8 character and stores it into
    *dest[8] buffer  */

   void
   buildUtf8String (UTF32_char utf16Char, int len, UTF8_char dest[8])
   {
     UTF8_char mask;

     for (int x = 0; x < 8; x++)
       {
 	dest[x] = 0x0;
       }

     if (len == 1)
       dest[0] = utf16Char;
     else
       {
 	switch (len)
 	  {
 	  case 1:
 	    break;
 	  case 2:
 	    mask = 0xc0;
 	    break;
 	  case 3:
 	    mask = 0xe0;
 	    break;
 	  case 4:
 	    mask = 0xf0;
 	    break;
 	  case 5:
 	    mask = 0xf8;
 	    break;
 	  case 6:
 	    mask = 0xfc;
 	    break;
 	  };

 	for (int i = len - 1; i > 0; i--)
 	  {
 	    dest[i] = (utf16Char & 0x3f) | 0x80;
 	    utf16Char >>= 6;
 	  }

 	dest[0] = utf16Char | mask;
 	dest[len] = 0;
       }
   }


   /*! This function will convert UTF-16 Buffer to UTF-8 Buffer and
     returns pointer to UTF-8 buffer */

   UTF8_char *
   convertUtf16toUtf8 (char *p, int len)
   {
     char *s;
     int length;
     int bufferLength;
     s = p;
     UTF8_char dest[8];
     UTF32_char utf16Char = 0;
     UTF8_char i = 0;
     UTF8_char j = 0;
     Array < UTF8_char > *out;
     out = new Array < UTF8_char > (len);

     while (i < len)
       {
 	utf16Char = *((UTF16_char *) & s[i]);
 	i += 2;
 	length = lengthUtf16Char (utf16Char);
 	buildUtf8String (utf16Char, length, dest);
 	j = 0;
 	while (dest[j])
 	  out->append (dest[j++]);
       }
     free (s);
     j = out->count ();
     UTF8_char *utf8Buffer = (UTF8_char *)calloc (j+1, 1);
     for (int i = 0; i < j; i++)
       utf8Buffer[i] = out->getElement (i);
     utf8Buffer[j] = 0x0;
     delete out;
     return utf8Buffer;
   }


   /*! Basically this function is use to compare "xmlns" with given
    * UTF-16 string, Otherwise it will compare "xmlns" with only first
    * characters and process Namespaces as attributes,
    *In oreder to FIX that this function defined */

   char *
   convertUtf16toUtf8Comp (char *p, int len)
   {
     int x = len/2;
     char buffer[x];
     int m ;
     int n;
     for (m = 0, n = 0; n<x; m +=2,n++)
       buffer[n] = p[m];
     char* s = buffer;
     return s;
   }


   /*!Basically use for compare "xmlns" with given char buffer  */
   int
   compare (const char *s, int n)
   {
     UTF8_char *tmp;
     char *k;
     int x;
     if (unicodeState == None)
       return strncmp (start, s, n);
     else
       {
 	k = (char *)malloc (n*2);
 	memcpy (k, start, n*2);
 	tmp = convertUtf16toUtf8 (k, n*2);
 	x = strncmp ((char *)tmp, s, n);
 	free (tmp);
 	return x;
       }
   }

 };

 #endif
	#ifndef TOKEN_HPP
	#define TOKEN_HPP

	struct token_t
	{
	int type;
	char start, end;

	enum
	{
	Unknown, Name, AttValue, Prefix, CharData
	};

	void
	relocate (int offset)
	{
	start -= offset;
	end -= offset;
	}

	int
	isOfType (int type)
	{
	return this->type == type;

	}

	int
	length ()
	{
	if (end)
	return end - start + 1;
	else
	throw new XmlPullParserException ();
	}


	#ifndef UNICODE_OUT /* To Obtain UTF-8 Output */

	/*!added two toString methods, Because according to output format
	* converstion function will be vary
	* Here memcpy was used instead of strcpy
	* strcpy can't survive with UTF-16 string because , it ( UTF-16
	* string) has null characters middle of string */

	char*
	toString (unsigned int unicodeState)
	{
	int len = length ();
	char p = (char )malloc (len + 1);
	memcpy (p, start, len);
	p[len] = 0;
	unicodeLength = len;
	switch (unicodeState)
	{
	case None:
	return p;
	break;
	case LE:
	case BE:
	return (char *)convertUtf16toUtf8 (p, unicodeLength);
	break;
	};
	}
	#endif


	#ifdef UNICODE_OUT /! To Obtain UTF-16 Output /


	char*
	toString (unsigned int unicodeState)
	{
	int len = length ();
	char p = (char )malloc (len + 1);
	memcpy (p, start, len);
	p[len] = 0;
	unicodeLength = len;
	switch (unicodeState)
	{
	case None:
	return (char *)convertUtf8toUtf16 (p, unicodeLength);
	break;
	case LE:
	case BE:
	return p;
	break;
	};
	}
	#endif


	/! To Determine length of Given UTF-8 character /

	UTF8_char
	lengthUtf8Char (char nextChar)
	{
	UTF8_char length;

	if ((nextChar & 0x80) == 0x00)
	length = 1;
	else if ((nextChar & 0xe0) == 0xc0)
	length = 2;
	else if ((nextChar & 0xf0) == 0xe0)
	length = 3;
	else if ((nextChar & 0xf1) == 0xf0)
	length = 4;
	else if ((nextChar & 0xfc) == 0xf1)
	length = 5;
	else
	length = 6;

	return length;
	}


	/!mask will use to retrieve bits from Multibyte character /
	UTF8_char
	maskUtf8Char (UTF8_char length)
	{
	UTF8_char mask;

	switch (length)
	{
	case 1:
	mask = 0x7f;
	break;
	case 2:
	mask = 0x1f;
	case 3:
	mask = 0x0f;
	break;
	case 4:
	mask = 0x07;
	break;
	case 5:
	mask = 0x03;
	break;
	case 6:
	mask = 0x01;
	break;
	};

	return mask;
	}


	/*! This one will Convert UTF-8 buffer to UTF-16 Buffer
	* we have to give char pointer to utf-8 buffer and length of
	that buffer , Then it will return pointer to UTF-16 Buffer/

	UTF16_char *
	convertUtf8toUtf16 (char *p, int len)
	{
	char *s;
	s = p;
	Array < UTF16_char > *outBuffer;
	outBuffer = new Array < UTF16_char > (len);
	char mask;
	char utf8Buffer[6] = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0};
	UTF32_char result;
	for (int i = 0; i < len; )
	{
	UTF8_char length = lengthUtf8Char (p[i]);
	mask = maskUtf8Char (length);
	for (int j = 0; j < length; j++)
	utf8Buffer[j] = p[i++];

	result = utf8Buffer[0] & mask;
	for (int k = 1; k < length; k++)
	{
	if ((utf8Buffer[k] & 0xc0) != 0x80)
	{
	throw XmlPullParserException ();
	break;
	}
	result <<= 6;
	result \|= (utf8Buffer[k] & 0x3f);
	}
	outBuffer->append ((UTF16_char) result);
	}
	free (s);
	UTF8_char size = outBuffer->count ();
	unicodeLength = size * sizeof (unsigned short);
	UTF16_char out = (UTF16_char )malloc ((size+1) * sizeof (unsigned short));
	for (int l = 0; l < size; l++)
	out[l] = outBuffer->getElement (l);
	out[size] = 0x0;
	delete outBuffer;
	return out;
	}


	/*! This function use to determine, length of UTF-8 character of
	* Given UTF-16 character */
	int
	lengthUtf16Char (UTF32_char utf16Char)
	{
	int utf16CharLength;

	if (0x80 > utf16Char)
	utf16CharLength = 1;
	else if (0x800 > utf16Char)
	utf16CharLength = 2;
	else if (0x10000 > utf16Char)
	utf16CharLength = 3;
	else if (0x200000 > utf16Char)
	utf16CharLength = 4;
	else if (0x4000000 > utf16Char)
	utf16CharLength = 5;
	else
	utf16CharLength = 6;

	return utf16CharLength;
	}


	/*! after Giveing UTF-16 Character and its equivalent UTf-8 length
	*this function will create UTF-8 character and stores it into
	dest[8] buffer /

	void
	buildUtf8String (UTF32_char utf16Char, int len, UTF8_char dest[8])
	{
	UTF8_char mask;

	for (int x = 0; x < 8; x++)
	{
	dest[x] = 0x0;
	}

	if (len == 1)
	dest[0] = utf16Char;
	else
	{
	switch (len)
	{
	case 1:
	break;
	case 2:
	mask = 0xc0;
	break;
	case 3:
	mask = 0xe0;
	break;
	case 4:
	mask = 0xf0;
	break;
	case 5:
	mask = 0xf8;
	break;
	case 6:
	mask = 0xfc;
	break;
	};

	for (int i = len - 1; i > 0; i--)
	{
	dest[i] = (utf16Char & 0x3f) \| 0x80;
	utf16Char >>= 6;
	}

	dest[0] = utf16Char \| mask;
	dest[len] = 0;
	}
	}


	/*! This function will convert UTF-16 Buffer to UTF-8 Buffer and
	returns pointer to UTF-8 buffer */

	UTF8_char *
	convertUtf16toUtf8 (char *p, int len)
	{
	char *s;
	int length;
	int bufferLength;
	s = p;
	UTF8_char dest[8];
	UTF32_char utf16Char = 0;
	UTF8_char i = 0;
	UTF8_char j = 0;
	Array < UTF8_char > *out;
	out = new Array < UTF8_char > (len);

	while (i < len)
	{
	utf16Char = ((UTF16_char ) & s[i]);
	i += 2;
	length = lengthUtf16Char (utf16Char);
	buildUtf8String (utf16Char, length, dest);
	j = 0;
	while (dest[j])
	out->append (dest[j++]);
	}
	free (s);
	j = out->count ();
	UTF8_char utf8Buffer = (UTF8_char )calloc (j+1, 1);
	for (int i = 0; i < j; i++)
	utf8Buffer[i] = out->getElement (i);
	utf8Buffer[j] = 0x0;
	delete out;
	return utf8Buffer;
	}


	/*! Basically this function is use to compare "xmlns" with given
	* UTF-16 string, Otherwise it will compare "xmlns" with only first
	* characters and process Namespaces as attributes,
	In oreder to FIX that this function defined /

	char *
	convertUtf16toUtf8Comp (char *p, int len)
	{
	int x = len/2;
	char buffer[x];
	int m ;
	int n;
	for (m = 0, n = 0; n<x; m +=2,n++)
	buffer[n] = p[m];
	char* s = buffer;
	return s;
	}


	/!Basically use for compare "xmlns" with given char buffer /
	int
	compare (const char *s, int n)
	{
	UTF8_char *tmp;
	char *k;
	int x;
	if (unicodeState == None)
	return strncmp (start, s, n);
	else
	{
	k = (char )malloc (n2);
	memcpy (k, start, n*2);
	tmp = convertUtf16toUtf8 (k, n*2);
	x = strncmp ((char *)tmp, s, n);
	free (tmp);
	return x;
	}
	}

	};

	#endif