src/libguac/unicode.c - guacamole-server - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *   http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing,
  * software distributed under the License is distributed on an
  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
  * KIND, either express or implied.  See the License for the
  * specific language governing permissions and limitations
  * under the License.
  */

 #include "config.h"

 #include "guacamole/unicode.h"

 #include <stddef.h>

 size_t guac_utf8_charsize(unsigned char c) {

     /* Determine size in bytes of character */
     if ((c | 0x7F) == 0x7F) return 1;
     if ((c | 0x1F) == 0xDF) return 2;
     if ((c | 0x0F) == 0xEF) return 3;
     if ((c | 0x07) == 0xF7) return 4;

     /* Default to one character */
     return 1;

 }

 size_t guac_utf8_strlen(const char* str) {

     /* The current length of the string */
     int length = 0;

     /* Number of characters before start of next character */
     int skip = 0;

     while (*str != 0) {

         /* If skipping, then skip */
         if (skip > 0) skip--;

         /* Otherwise, determine next skip value, and increment length */
         else {

             /* Get next character */
             unsigned char c = (unsigned char) *str;

             /* Determine skip value (size in bytes of rest of character) */
             skip = guac_utf8_charsize(c) - 1;

             length++;
         }

         str++;
     }

     return length;

 }

 int guac_utf8_write(int codepoint, char* utf8, int length) {

     int i;
     int mask, bytes;

     /* If not even one byte, cannot write */
     if (length <= 0)
         return 0;

     /* Determine size and initial byte mask */
     if (codepoint <= 0x007F) {
         mask  = 0x00;
         bytes = 1;
     }
     else if (codepoint <= 0x7FF) {
         mask  = 0xC0;
         bytes = 2;
     }
     else if (codepoint <= 0xFFFF) {
         mask  = 0xE0;
         bytes = 3;
     }
     else if (codepoint <= 0x1FFFFF) {
         mask  = 0xF0;
         bytes = 4;
     }

     /* Otherwise, invalid codepoint */
     else {
         *(utf8++) = '?';
         return 1;
     }

     /* If not enough room, don't write anything */
     if (bytes > length)
         return 0;

     /* Offset buffer by size */
     utf8 += bytes - 1;

     /* Add trailing bytes, if any */
     for (i=1; i<bytes; i++) {
         *(utf8--) = 0x80 | (codepoint & 0x3F);
         codepoint >>= 6;
     }

     /* Set initial byte */
     *utf8 = mask | codepoint;

     /* Done */
     return bytes;

 }

 int guac_utf8_read(const char* utf8, int length, int* codepoint) {

     unsigned char initial;
     int bytes;
     int result;
     int i;

     /* If not even one byte, cannot read */
     if (length <= 0)
         return 0;

     /* Read initial byte */
     initial = (unsigned char) *(utf8++);

     /* 0xxxxxxx */
     if ((initial | 0x7F) == 0x7F) {
         result = initial;
         bytes  = 1;
     }

     /* 110xxxxx 10xxxxxx */
     else if ((initial | 0x1F) == 0xDF) {
         result = initial & 0x1F;
         bytes  = 2;
     }

     /* 1110xxxx 10xxxxxx 10xxxxxx */
     else if ((initial | 0x0F) == 0xEF) {
         result = initial & 0x0F;
         bytes  = 3;
     }

     /* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
     else if ((initial | 0x07) == 0xF7) {
         result = initial & 0x07;
         bytes  = 4;
     }

     /* Otherwise, invalid codepoint */
     else {
         *codepoint = 0xFFFD; /* Replacement character */
         return 1;
     }

     /* If not enough room, don't read anything */
     if (bytes > length)
         return 0;

     /* Read trailing bytes, if any */
     for (i=1; i<bytes; i++) {
         result <<= 6;
         result |= *(utf8++) & 0x3F;
     }

     *codepoint = result;
     return bytes;

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing,
	* software distributed under the License is distributed on an
	* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	* KIND, either express or implied. See the License for the
	* specific language governing permissions and limitations
	* under the License.
	*/

	#include "config.h"

	#include "guacamole/unicode.h"

	#include <stddef.h>

	size_t guac_utf8_charsize(unsigned char c) {

	/* Determine size in bytes of character */
	if ((c \| 0x7F) == 0x7F) return 1;
	if ((c \| 0x1F) == 0xDF) return 2;
	if ((c \| 0x0F) == 0xEF) return 3;
	if ((c \| 0x07) == 0xF7) return 4;

	/* Default to one character */
	return 1;

	}

	size_t guac_utf8_strlen(const char* str) {

	/* The current length of the string */
	int length = 0;

	/* Number of characters before start of next character */
	int skip = 0;

	while (*str != 0) {

	/* If skipping, then skip */
	if (skip > 0) skip--;

	/* Otherwise, determine next skip value, and increment length */
	else {

	/* Get next character */
	unsigned char c = (unsigned char) *str;

	/* Determine skip value (size in bytes of rest of character) */
	skip = guac_utf8_charsize(c) - 1;

	length++;
	}

	str++;
	}

	return length;

	}

	int guac_utf8_write(int codepoint, char* utf8, int length) {

	int i;
	int mask, bytes;

	/* If not even one byte, cannot write */
	if (length <= 0)
	return 0;

	/* Determine size and initial byte mask */
	if (codepoint <= 0x007F) {
	mask = 0x00;
	bytes = 1;
	}
	else if (codepoint <= 0x7FF) {
	mask = 0xC0;
	bytes = 2;
	}
	else if (codepoint <= 0xFFFF) {
	mask = 0xE0;
	bytes = 3;
	}
	else if (codepoint <= 0x1FFFFF) {
	mask = 0xF0;
	bytes = 4;
	}

	/* Otherwise, invalid codepoint */
	else {
	*(utf8++) = '?';
	return 1;
	}

	/* If not enough room, don't write anything */
	if (bytes > length)
	return 0;

	/* Offset buffer by size */
	utf8 += bytes - 1;

	/* Add trailing bytes, if any */
	for (i=1; i<bytes; i++) {
	*(utf8--) = 0x80 \| (codepoint & 0x3F);
	codepoint >>= 6;
	}

	/* Set initial byte */
	*utf8 = mask \| codepoint;

	/* Done */
	return bytes;

	}

	int guac_utf8_read(const char* utf8, int length, int* codepoint) {

	unsigned char initial;
	int bytes;
	int result;
	int i;

	/* If not even one byte, cannot read */
	if (length <= 0)
	return 0;

	/* Read initial byte */
	initial = (unsigned char) *(utf8++);

	/* 0xxxxxxx */
	if ((initial \| 0x7F) == 0x7F) {
	result = initial;
	bytes = 1;
	}

	/* 110xxxxx 10xxxxxx */
	else if ((initial \| 0x1F) == 0xDF) {
	result = initial & 0x1F;
	bytes = 2;
	}

	/* 1110xxxx 10xxxxxx 10xxxxxx */
	else if ((initial \| 0x0F) == 0xEF) {
	result = initial & 0x0F;
	bytes = 3;
	}

	/* 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
	else if ((initial \| 0x07) == 0xF7) {
	result = initial & 0x07;
	bytes = 4;
	}

	/* Otherwise, invalid codepoint */
	else {
	codepoint = 0xFFFD; / Replacement character */
	return 1;
	}

	/* If not enough room, don't read anything */
	if (bytes > length)
	return 0;

	/* Read trailing bytes, if any */
	for (i=1; i<bytes; i++) {
	result <<= 6;
	result \|= *(utf8++) & 0x3F;
	}

	*codepoint = result;
	return bytes;

	}