core/sql/common/unicode_char_set.h - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         unicode_char_set.h
  * RCS:          $Id:
  * Description:  The implementation of unicode_char_set class
  *
  *
  * Created:      7/8/98
  * Modified:     $ $Date: 2006/11/01 01:38:09 $ (GMT)
  * Language:     C++
  * Status:       $State: Exp $
  *
  *
  *
  *
  *****************************************************************************
  */

 #ifndef _UNICODE_CHAR_SET_H
 #define _UNICODE_CHAR_SET_H

 ////////////////////////////////////////////////////////////////////
 // 4/10/98  A simple class devoting to the concept of Unicode
 ////////////////////////////////////////////////////////////////////

 #include <string.h>

 #include <stddef.h>
 #include <limits.h>

 #include "Platform.h"

 #include "NAWinNT.h"
 #include "NABoolean.h"

 // Forward declaration
 class NAWString;

 class unicode_char_set
 {
 public:

    enum CHAR_CODE_POINTS {SPACE=0x0020, PERCENT=0x0025, UNDERSCORE=0x005F};

    unicode_char_set() {};

    virtual ~unicode_char_set() {};

    static NAWchar to_upper(const NAWchar);

    static void to_upper(NAWchar *, size_t, NAWString &);

 static Lng32 to_upper(NAWchar *, Lng32 , NAWchar *, Lng32);

 // For full case mapping, an lower case character may map to
 // more than one upper case character
    static NAWchar* to_upper_full(const NAWchar);

    static NAWchar to_lower(const NAWchar);

    static NAWchar space_char() { return unicode_char_set::SPACE; };

    static NAWchar null_char() {return 0;};

    static NAWchar underscore_char() { return unicode_char_set::UNDERSCORE; };

    static NAWchar percent_char() { return unicode_char_set::PERCENT; };

    static NAWchar maxCharValue() { return USHRT_MAX; };

    static short bytesPerChar() { return sizeof(NAWchar); };

    static NABoolean isValidUCS2CodePoint(NAWchar wc)
    {
      UInt32 codePointValue = (UInt32)wc;
      return ( codePointValue < 0xFFFE );
    };

    /*
    *** Comment out the following functions because they are not yet used. ***

    // Determines if a NAWchar is a UTF-16 high surrogate code point,
    // ranging from 0xd800 to 0xdbff, inclusive.
    static NABoolean isHighSurrogateCodePoint(NAWchar wc)
    {
      UInt32 codePointValue = (UInt32)wc;
      return ( codePointValue >= 0xD800 && codePointValue <= 0xDBFF );
    }

    // Determines if a character is a UTF-16 low surrogate code point,
    // ranging from 0xdc00 to 0xdfff, inclusive.
    static NABoolean isLowSurrogateCodePoint(NAWchar wc)
    {
      UInt32 codePointValue = (UInt32)wc;
      return ( codePointValue >= 0xDC00 && codePointValue <= 0xDFFF );
    }

    // Determines if the specified code units form a UTF-16 surrogate pair.
    static NABoolean isUTF16SurrogatePair(NAWchar hwc, NAWchar lwc)
    {
      return ( isHighSurrogateCodePoint(hwc) && isLowSurrogateCodePoint(lwc) );
    }

    *** End of commented-out code ***
    */

 private:

 };

 //
 // UTF-8 related macro definitions and function declarations
 //

 #define IS_7_BIT_ASCII_IN_UTF8_CHAR(x)  ((((x) & 0x80) >> 7) == 0)
 #define IS_NOT_1ST_BYTE_IN_UTF8_CHAR(x) ((((x) & 0xC0) >> 6) == 2)
 #define IS_1ST_BYTE_IN_UTF8_CHAR(x)     ((((x) & 0xC0) >> 6) != 2)

 Int32 UTF8CharLenInBytes(const unsigned char firstByteOfTheUtf8Char); // returns 0 on errors

 Int32 IndexOfLastByteOfUTF8CharAtOrBeforePos (const unsigned char *utf8Str,
                                               const Int32 utf8StrLenInBytes,
                                               const Int32 bytePos); // returns -1 on errors

 Int32 IndexOfFirstByteOfUTF8CharAtOrBeforePos(const unsigned char *utf8Str,
                                               const Int32 utf8StrLenInBytes,
                                               const Int32 bytePos); // returns -1 on errors

 #endif
	/**********************************************************************
	// @@@ START COPYRIGHT @@@
	//
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	//
	// @@@ END COPYRIGHT @@@
	**********************************************************************/
	/* --C++--
	*****************************************************************************
	*
	* File: unicode_char_set.h
	* RCS: $Id:
	* Description: The implementation of unicode_char_set class
	*
	*
	* Created: 7/8/98
	* Modified: $ $Date: 2006/11/01 01:38:09 $ (GMT)
	* Language: C++
	* Status: $State: Exp $
	*
	*
	*
	*
	*****************************************************************************
	*/

	#ifndef _UNICODE_CHAR_SET_H
	#define _UNICODE_CHAR_SET_H

	////////////////////////////////////////////////////////////////////
	// 4/10/98 A simple class devoting to the concept of Unicode
	////////////////////////////////////////////////////////////////////

	#include <string.h>

	#include <stddef.h>
	#include <limits.h>

	#include "Platform.h"

	#include "NAWinNT.h"
	#include "NABoolean.h"

	// Forward declaration
	class NAWString;

	class unicode_char_set
	{
	public:

	enum CHAR_CODE_POINTS {SPACE=0x0020, PERCENT=0x0025, UNDERSCORE=0x005F};

	unicode_char_set() {};

	virtual ~unicode_char_set() {};

	static NAWchar to_upper(const NAWchar);

	static void to_upper(NAWchar *, size_t, NAWString &);

	static Lng32 to_upper(NAWchar , Lng32 , NAWchar , Lng32);

	// For full case mapping, an lower case character may map to
	// more than one upper case character
	static NAWchar* to_upper_full(const NAWchar);

	static NAWchar to_lower(const NAWchar);

	static NAWchar space_char() { return unicode_char_set::SPACE; };

	static NAWchar null_char() {return 0;};

	static NAWchar underscore_char() { return unicode_char_set::UNDERSCORE; };

	static NAWchar percent_char() { return unicode_char_set::PERCENT; };

	static NAWchar maxCharValue() { return USHRT_MAX; };

	static short bytesPerChar() { return sizeof(NAWchar); };

	static NABoolean isValidUCS2CodePoint(NAWchar wc)
	{
	UInt32 codePointValue = (UInt32)wc;
	return ( codePointValue < 0xFFFE );
	};

	/*
	* Comment out the following functions because they are not yet used. *

	// Determines if a NAWchar is a UTF-16 high surrogate code point,
	// ranging from 0xd800 to 0xdbff, inclusive.
	static NABoolean isHighSurrogateCodePoint(NAWchar wc)
	{
	UInt32 codePointValue = (UInt32)wc;
	return ( codePointValue >= 0xD800 && codePointValue <= 0xDBFF );
	}

	// Determines if a character is a UTF-16 low surrogate code point,
	// ranging from 0xdc00 to 0xdfff, inclusive.
	static NABoolean isLowSurrogateCodePoint(NAWchar wc)
	{
	UInt32 codePointValue = (UInt32)wc;
	return ( codePointValue >= 0xDC00 && codePointValue <= 0xDFFF );
	}

	// Determines if the specified code units form a UTF-16 surrogate pair.
	static NABoolean isUTF16SurrogatePair(NAWchar hwc, NAWchar lwc)
	{
	return ( isHighSurrogateCodePoint(hwc) && isLowSurrogateCodePoint(lwc) );
	}

	* End of commented-out code *
	*/

	private:

	};

	//
	// UTF-8 related macro definitions and function declarations
	//

	#define IS_7_BIT_ASCII_IN_UTF8_CHAR(x) ((((x) & 0x80) >> 7) == 0)
	#define IS_NOT_1ST_BYTE_IN_UTF8_CHAR(x) ((((x) & 0xC0) >> 6) == 2)
	#define IS_1ST_BYTE_IN_UTF8_CHAR(x) ((((x) & 0xC0) >> 6) != 2)

	Int32 UTF8CharLenInBytes(const unsigned char firstByteOfTheUtf8Char); // returns 0 on errors

	Int32 IndexOfLastByteOfUTF8CharAtOrBeforePos (const unsigned char *utf8Str,
	const Int32 utf8StrLenInBytes,
	const Int32 bytePos); // returns -1 on errors

	Int32 IndexOfFirstByteOfUTF8CharAtOrBeforePos(const unsigned char *utf8Str,
	const Int32 utf8StrLenInBytes,
	const Int32 bytePos); // returns -1 on errors

	#endif