blob: fcb7ee9fc0f469973dad733a7f1f24622f8dcf96 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "hycomp.h"
/**
* Decode the UTF8 character.
*
* Decode the input UTF8 character and stores it into result.
*
* @param[in] input The UTF8 character
* @param[in,out] result buffer for unicode characters
* @param[in] bytesRemaining number of bytes remaining in input
*
* @return The number of UTF8 characters consumed (1,2,3) on success, 0 on failure
* @note Don't read more than bytesRemaining characters.
* @note If morecharacters are required to fully decode the character, return failure
*/
U_32
decodeUTF8CharN (const U_8 * input, U_16 * result, U_32 bytesRemaining)
{
U_8 c;
const U_8 *cursor = input;
if (bytesRemaining < 1)
{
return 0;
}
c = *cursor++;
if (c == 0x00)
{
/* illegal NUL encoding */
return 0;
}
else if ((c & 0x80) == 0x00)
{
/* one byte encoding */
*result = (U_16) c;
return 1;
}
else if ((c & 0xE0) == 0xC0)
{
/* two byte encoding */
U_16 unicodeC;
if (bytesRemaining < 2)
{
return 0;
}
unicodeC = ((U_16) c & 0x1F) << 6;
c = *cursor++;
unicodeC += (U_16) c & 0x3F;
if ((c & 0xC0) != 0x80)
{
return 0;
}
*result = unicodeC;
return 2;
}
else if ((c & 0xF0) == 0xE0)
{
/* three byte encoding */
U_16 unicodeC;
if (bytesRemaining < 3)
{
return 0;
}
unicodeC = ((U_16) c & 0x0F) << 12;
c = *cursor++;
unicodeC += ((U_16) c & 0x3F) << 6;
if ((c & 0xC0) != 0x80)
{
return 0;
}
c = *cursor++;
unicodeC += (U_16) c & 0x3F;
if ((c & 0xC0) != 0x80)
{
return 0;
}
*result = unicodeC;
return 3;
}
else
{
/* illegal encoding (i.e. would decode to a char > 0xFFFF) */
return 0;
}
}
/**
* Decode the UTF8 character.
*
* Decode the input UTF8 character and stores it into result.
*
* @param[in] input The UTF8 character
* @param[in,out] result buffer for unicode characters
*
* @return The number of UTF8 characters consumed (1,2,3) on success, 0 on failure
*/
U_32
decodeUTF8Char (const U_8 * input, U_16 * result)
{
/* a UTF8 character can't require more than 3 bytes */
return decodeUTF8CharN (input, result, 3);
}