modules/portlib/src/main/native/common/shared/utf8decode.c - harmony-classlib - Git at Google

 /*
  *  Licensed to the Apache Software Foundation (ASF) under one or more
  *  contributor license agreements.  See the NOTICE file distributed with
  *  this work for additional information regarding copyright ownership.
  *  The ASF licenses this file to You under the Apache License, Version 2.0
  *  (the "License"); you may not use this file except in compliance with
  *  the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  *  Unless required by applicable law or agreed to in writing, software
  *  distributed under the License is distributed on an "AS IS" BASIS,
  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  *  See the License for the specific language governing permissions and
  *  limitations under the License.
  */

 #include "hycomp.h"

 /**
  * Decode the UTF8 character.
  *
  * Decode the input UTF8 character and stores it into result.
  *
  * @param[in] input The UTF8 character
  * @param[in,out] result buffer for unicode characters
  * @param[in] bytesRemaining number of bytes remaining in input
  *
  * @return The number of UTF8 characters consumed (1,2,3) on success, 0 on failure
  * @note Don't read more than bytesRemaining characters.
  * @note  If morecharacters are required to fully decode the character, return failure
  */
 U_32
 decodeUTF8CharN (const U_8 * input, U_16 * result, U_32 bytesRemaining)
 {
   U_8 c;
   const U_8 *cursor = input;

   if (bytesRemaining < 1)
     {
       return 0;
     }

   c = *cursor++;
   if (c == 0x00)
     {
       /* illegal NUL encoding */

       return 0;

     }
   else if ((c & 0x80) == 0x00)
     {
       /* one byte encoding */

       *result = (U_16) c;
       return 1;

     }
   else if ((c & 0xE0) == 0xC0)
     {
       /* two byte encoding */
       U_16 unicodeC;

       if (bytesRemaining < 2)
         {
           return 0;
         }
       unicodeC = ((U_16) c & 0x1F) << 6;

       c = *cursor++;
       unicodeC += (U_16) c & 0x3F;
       if ((c & 0xC0) != 0x80)
         {
           return 0;
         }

       *result = unicodeC;
       return 2;

     }
   else if ((c & 0xF0) == 0xE0)
     {
       /* three byte encoding */
       U_16 unicodeC;

       if (bytesRemaining < 3)
         {
           return 0;
         }
       unicodeC = ((U_16) c & 0x0F) << 12;

       c = *cursor++;
       unicodeC += ((U_16) c & 0x3F) << 6;
       if ((c & 0xC0) != 0x80)
         {
           return 0;
         }

       c = *cursor++;
       unicodeC += (U_16) c & 0x3F;
       if ((c & 0xC0) != 0x80)
         {
           return 0;
         }

       *result = unicodeC;
       return 3;
     }
   else
     {
       /* illegal encoding (i.e. would decode to a char > 0xFFFF) */

       return 0;
     }
 }

 /**
  * Decode the UTF8 character.
  *
  * Decode the input UTF8 character and stores it into result.
  *
  * @param[in] input The UTF8 character
  * @param[in,out] result buffer for unicode characters
  *
  * @return The number of UTF8 characters consumed (1,2,3) on success, 0 on failure
  */
 U_32
 decodeUTF8Char (const U_8 * input, U_16 * result)
 {
   /* a UTF8 character can't require more than 3 bytes */
   return decodeUTF8CharN (input, result, 3);
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#include "hycomp.h"

	/**
	* Decode the UTF8 character.
	*
	* Decode the input UTF8 character and stores it into result.
	*
	* @param[in] input The UTF8 character
	* @param[in,out] result buffer for unicode characters
	* @param[in] bytesRemaining number of bytes remaining in input
	*
	* @return The number of UTF8 characters consumed (1,2,3) on success, 0 on failure
	* @note Don't read more than bytesRemaining characters.
	* @note If morecharacters are required to fully decode the character, return failure
	*/
	U_32
	decodeUTF8CharN (const U_8 * input, U_16 * result, U_32 bytesRemaining)
	{
	U_8 c;
	const U_8 *cursor = input;

	if (bytesRemaining < 1)
	{
	return 0;
	}

	c = *cursor++;
	if (c == 0x00)
	{
	/* illegal NUL encoding */

	return 0;

	}
	else if ((c & 0x80) == 0x00)
	{
	/* one byte encoding */

	*result = (U_16) c;
	return 1;

	}
	else if ((c & 0xE0) == 0xC0)
	{
	/* two byte encoding */
	U_16 unicodeC;

	if (bytesRemaining < 2)
	{
	return 0;
	}
	unicodeC = ((U_16) c & 0x1F) << 6;

	c = *cursor++;
	unicodeC += (U_16) c & 0x3F;
	if ((c & 0xC0) != 0x80)
	{
	return 0;
	}

	*result = unicodeC;
	return 2;

	}
	else if ((c & 0xF0) == 0xE0)
	{
	/* three byte encoding */
	U_16 unicodeC;

	if (bytesRemaining < 3)
	{
	return 0;
	}
	unicodeC = ((U_16) c & 0x0F) << 12;

	c = *cursor++;
	unicodeC += ((U_16) c & 0x3F) << 6;
	if ((c & 0xC0) != 0x80)
	{
	return 0;
	}

	c = *cursor++;
	unicodeC += (U_16) c & 0x3F;
	if ((c & 0xC0) != 0x80)
	{
	return 0;
	}

	*result = unicodeC;
	return 3;
	}
	else
	{
	/* illegal encoding (i.e. would decode to a char > 0xFFFF) */

	return 0;
	}
	}

	/**
	* Decode the UTF8 character.
	*
	* Decode the input UTF8 character and stores it into result.
	*
	* @param[in] input The UTF8 character
	* @param[in,out] result buffer for unicode characters
	*
	* @return The number of UTF8 characters consumed (1,2,3) on success, 0 on failure
	*/
	U_32
	decodeUTF8Char (const U_8 * input, U_16 * result)
	{
	/* a UTF8 character can't require more than 3 bytes */
	return decodeUTF8CharN (input, result, 3);
	}