core/sql/common/charinfo.h - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 #ifndef CHARINFO_H
 #define CHARINFO_H
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         CharInfo.h
  * Description:  The header file for the CharInfo class. This class defines
  *   	 	 and provides features/information about character sets,
  *		 collations and coercibility that are supported by SQL/MX.
  *
  *
  * Created:      7/8/98
  * Modified:     $Date: 2007/10/09 19:38:37 $ (GMT)
  * Language:     C++
  * Status:       $State: Exp $
  *
  *
  *
  *****************************************************************************
  */


 #include "Platform.h"

 #include "Collections.h"	// for LIST in CollationDB
 #include "NABoolean.h"
 #include "Platform.h"
 #include "NAWinNT.h"
 #include "ComCharSetDefs.h"
 #include "sql_charset_strings.h"

 // Forward references
 class ComMPLoc;
 class QualifiedName;
 class SchemaName;

 // Contents of this file
 class CharInfo;
 class CollationInfo;
 class CollationDB;


 // Notice how we currently equate SQL_TEXT with ISO88591.
 // This is used by StaticCompiler.cpp, in the MODULE statement.
 //
 //   ***HOWEVER:***							##
 //   Ansi 4.2 specifies that SQL_TEXT has to be the union of all char-sets
 //   implemented by an implementation. In our case, it should be UNICODE.
 //   So I am not sure if we should disallow 'MODULE...NAMES ARE SQL_TEXT'
 //   and instead promote '...NAMES ARE ISO88591' instead.
 //
 //   Taking a UNICODE module can be done, except that we can not
 //   effectively spit out the error msgs as we do not know the locale
 //   into which error msg is converted
 //   (NT does not have a font that would cover all UNICODE characters!).
 //
 //   Disallowing 'MODULE...NAMES ARE SQL_TEXT' anytime soon would cause
 //   disruption to our Beta sites.
 //   I propose that, whenever we do fully support a UNICODE module file,
 //   then we switch the CharInfo enum SQL_TEXT to equal UNICODE instead
 //   of ISO88591 (and deal with any disruption then...).


 #undef UNICODE
 // conflict with definition in file sqlmxevents/zmxc
 #undef SQL_TEXT

 #undef  MAX_CHAR_SET_STRING_LENGTH
 #define MAX_CHAR_SET_STRING_LENGTH 128	// keep in sync with w:/cli/sqlcli.h
 					// and with w:/common/ComSizeDefs.h
 					//     ComMAX_ANSI_IDENTIFIER_INTERNAL_LEN_IN_NAWCHARS

 #define MAXNPASSES 4                    // Maximum number of Passes for any collation
 static const short collationNPasses[] = {2,2};
 static const unsigned char collationMaxChar[] = {0x81,0x81};


 class CharInfo
 {
 public:

    enum CharSet		// keep in sync with w:/cli/sqlcli.h enum SQLCHARSET_CODE !
    {    		// and with the charset map table in CharInfo.cpp  !
      CHARSET_MIN    = -2,
      KSC5601_MP	    = -2,  // an MX Unicode encoding could be named KSX5601.
      KANJI_MP	    = -1,  // logically equiv to SJIS, physically diff encoding.
      // the defines below are defined in common/ComCharSetDefs.h, so
      // that they can be used by other components (ODBC, utilities?)
      // without sourcing in this file
      UnknownCharSet =  SQLCHARSETCODE_UNKNOWN, //  0
      ISO88591 = SQLCHARSETCODE_ISO88591,   //  1
      ISO88592 = 2,
      ISO88593 = 3,
      ISO88594 = 4,
      ISO88595 = 5,
      ISO88596 = 6,
      ISO88597 = 7,
      ISO88598 = 8,
      ISO88599 = 9,
      SJIS     = SQLCHARSETCODE_SJIS,       // 10
      UNICODE  = SQLCHARSETCODE_UCS2,       // 11
      EUCJP    = SQLCHARSETCODE_EUCJP,      // 12
      BIG5     = SQLCHARSETCODE_BIG5,       // 13
      GB18030  = SQLCHARSETCODE_GB18030,    // 14
      UTF8     = SQLCHARSETCODE_UTF8,       // 15
      KSC5601  = SQLCHARSETCODE_MB_KSC5601, // 16
      GB2312   = SQLCHARSETCODE_GB2312,     // 17
      GBK      = SQLCHARSETCODE_GBK,        // 18
      //
      // synonyms
      SQL_TEXT	    = UNICODE,
      UCS2	    = UNICODE,
      DefaultCharSet = ISO88591,

      CHARSET_MAX    = 18,

      // for internal use only
      ISO_MAPPING_CODE = SQLCHARSETCODE_ISO_MAPPING // 9999
    };

    enum Collation    { UNKNOWN_COLLATION    = 0,
 		       DefaultCollation	    = 1,
 		       SJIS_COLLATION	    = SJIS,  // to prevent coding bugs!// SJIIS= 10
 		       FIRST_SYS_COLLATION  =100,   //system collations start at 101 to Last_sys_collation
 		       CZECH_COLLATION      =101,
                        CZECH_COLLATION_CI   =102,
 		       LAST_SYS_COLLATION   ,
 		       FIRST_USER_DEFINED_COLLATION = 1000
 		     };

    enum Coercibility { NO_COLLATING_SEQUENCE = 0, COERCIBLE, IMPLICIT, EXPLICIT
 		     };

    static CharSet  	getCharSetEnum(const char* name);
    static const char*	getCharSetName(CharSet cs,
    				       NABoolean retUnkAsBlank = FALSE);
    static NABoolean	isCharSetSupported(CharSet cs);
    static NABoolean	isCharSetSupported(const char* name)
    			{ return isCharSetSupported(getCharSetEnum(name)); }

    static NABoolean	isCharSetFullySupported(CharSet cs);
    static NABoolean	isCharSetFullySupported(const char* name)
    			{ return isCharSetFullySupported(getCharSetEnum(name)); }

    static NABoolean	isOnlySingleByteCharacters(CharSet cs);
    static NABoolean	isOnlySingleByteCharacters(const char* name)
    			{ return isOnlySingleByteCharacters(getCharSetEnum(name)); }

    static NABoolean     isModuleCharSetSupported(CharSet cs)
                         { return cs == CharInfo::ISO88591; };

    static NABoolean     isHexFormatSupported(CharSet cs);
    static NABoolean     isHexFormatSupported(const char* name)
 			{ return isHexFormatSupported(getCharSetEnum(name)); }

    static NABoolean	isTerminalCharSetSupported(CharSet cs);
    static NABoolean	isTerminalCharSetSupported(const char* name)
 			{ return isTerminalCharSetSupported(getCharSetEnum(name)); }

    static NABoolean     isMsgCharSetSupported(CharSet cs);
    static NABoolean	isMsgCharSetSupported(const char* name)
 			{ return isMsgCharSetSupported(getCharSetEnum(name)); }

    static void		toggleCharSetSupport(CharSet cs); // debugging only
    static Int32	        minBytesPerChar(CharSet cs);
    static Int32	        maxBytesPerChar(CharSet cs);
    static Int32	        bytesPerChar(CharSet cs)
                         { return maxBytesPerChar(cs); }

    static NABoolean	isSingleByteCharSet(CharSet cs)
 			{ return maxBytesPerChar(cs) == 1
 			    || cs == CharInfo::UTF8 // is variable-length/width multi-byte char-set but treat it as a C/C++ string
 			    ; }
    static NABoolean     isVariableWidthMultiByteCharSet(CharSet cs);

    static NABoolean	is_NCHAR_MP(CharSet cs)
 			{ return cs == KANJI_MP || cs == KSC5601_MP; }

    static Int32         getFSTypeFixedChar(CharSet cs);
    static Int32         getFSTypeVarChar(CharSet cs);
    static Int32         getFSTypeANSIChar(CharSet cs);

    static const char*   getReplacementCharacter(CharSet cs); // replacement for untranslatable chars

    // get the encoding charset for a (logical/SQL) charset
    static CharInfo::CharSet getEncoding(const CharInfo::CharSet);


    static Collation	getCollationEnum(const char *name,
 					 NABoolean formatNSK = FALSE,
    					 size_t maxlen = 0);
    static const char*	getCollationName(Collation co,
    					 NABoolean retUnkAsBlank = FALSE);
    static Int32	        getCollationFlags(Collation co);
    static NABoolean	isCollationUserDefined(Collation co)
    { return co >= FIRST_USER_DEFINED_COLLATION; }	// watch out for UNKNOWN_COLLATION!


    static const char*	getCoercibilityText(Coercibility ce);
    static NABoolean	compareCoercibility(Coercibility ce1, Coercibility ce2);

    // check if the code point value for each character in the input string is
    // a valid UCS2 character
    static NABoolean checkCodePoint(const NAWchar *inputStr, Int32 inputLen, CharInfo::CharSet cs);

    // Convert the int value returned by MBCS_DEFAULTCHARSET_()
    // to MX enum value.  See Guardian Procedure Calls Ref Manual.
    static CharSet  	getCharSetEnumFromNSK_MBCS(Int32 n)
    {
      switch (n) {
        case 1:	return KANJI_MP;
        case 12:	return KSC5601_MP;
        default:	return UnknownCharSet;	// we don't support other magic numbers
      }					// like Hangul, Big5, Chinese PC
    }

    // Obtain the character set locale information about locale machine.
    // The result is one of the SQLCHARSETCODE_xxx values defined in SQLCLI.h,
    // or "UNKNOWN" for a unknown locale.
    // Used by the sql_id and Formatter classes.
    static Lng32		findLocaleCharSet();
    static const char*	getLocaleCharSetAsString();
    static Int32		getTargetCharTypeFromLocale();

    // check whether the client character set (e.g., the cs of a hostvar) is
    // assignment compatible with the MX one (e.g., the cs of a column).
    static NABoolean isAssignmentCompatible(CharSet clientCS, CharSet mxCS)
    {
      return clientCS == mxCS ||
             (clientCS == CharInfo::UNICODE && // relaxation
              mxCS == CharInfo::ISO88591);
    };

   // for an arbitrary string encoded in "sourceCS", with length
   // "sourceLenInBytes", what is the max. length in bytes of this
   // string after converting it to "targetCS"?
   static Int32 getMaxConvertedLenInBytes(CharSet sourceCS,
                                          Int32   sourceLenInBytes,
                                          CharSet targetCS);

   static const CollationDB *builtinCollationDB();


 private:
 friend class CollationDB;			// needs to access builtinCDB_

    static const char*	const localeCharSet_;
    static const CollationDB   *builtinCollationDB_;

 }; // CharInfo

 // For the convenience of SqlParser.y, and ItemExpr::bindNode()
 struct CollationAndCoercibility {
   CharInfo::Collation           collation_;
   CharInfo::Coercibility        coercibility_;
 };


 class CollationInfo : public NABasicObject
 {
 friend class CollationDB;  // for robust security/validity: see private methods!

 public:
   enum CollationFlags {
     // external (governing SQL syntax and what MX supports) flags
     NO_FLAGS			= 0,
     NO_PAD			= 0x1,		// Ansi NO PAD (vs. PAD SPACE)
     ORDERED_CMP_ILLEGAL		= 0x10,		   // MP collations in MX-NSK-R1
     EQ_NE_CMP_ILLEGAL		= 0x20,		   // MP collations in MX-NSK-R1
     ALL_CMP_ILLEGAL		= EQ_NE_CMP_ILLEGAL | ORDERED_CMP_ILLEGAL,
     ALL_NEGATIVE_SYNTAX_FLAGS	= 0x0FFFFFFF,

     // internal (class-implementation) flags
     NO_ALLOC_AND_COPY_IN_CTOR	= 0x10000000,

     ALL_NEGATIVE_PLUS_STATIC	= ALL_NEGATIVE_SYNTAX_FLAGS | NO_ALLOC_AND_COPY_IN_CTOR
   };
   enum CollationMisc { MAX_NAME_PARTS   = 4,
 		       SIZEARRAY_SIZE   = MAX_NAME_PARTS + 1,
 		       OFFSETARRAY_SIZE = MAX_NAME_PARTS - 1
 		     };

   enum Pass
   {
     FirstPass	=0,
     SecondPass	=1,
     ThirdPass	=2,
     FourthPass   =3
   };

   enum CollationType
   {

     Sort	=0, // Used for sort. when the collation type is Sort, the encoded
 		    // value of a nullable column is prepended with 2 bytes
 		    // indicating whether the value is null or not null

     Compare	=1, // Used to compare two character strings. The main diffrence
 		    // with the sort type is that the encoded value of a null value
 		    // is actually a null value

     Search	=2  // used to do string search
   };

   enum SortDirection
   {
     DefaultDir	=0,
     Ascending	=1,
     Descending	=2
   };


   CollationInfo(CollHeap *h, CharInfo::Collation co, const char *name,
 		CollationFlags flags = NO_FLAGS,
 		size_t *sizArray = NULL);	  /* array[SIZEARRAY_SIZE] */

   ~CollationInfo()
   {
     if (!(flags_ | NO_ALLOC_AND_COPY_IN_CTOR)) delete (char*)name_;
   }

   CharInfo::Collation	getCollationEnum() const	{ return co_; }
   const char *		getCollationName() const	{ return name_; }
   Int32			getCollationFlags() const	{ return flags_; }

   // for debugging
   void			display() const;

   static NABoolean isSystemCollation(const CharInfo::Collation collation)
   {
     return (collation > CharInfo::FIRST_SYS_COLLATION && collation < CharInfo::LAST_SYS_COLLATION);
   }

   static short  getCollationParamsIndex(const CharInfo::Collation collation)
   {
     return (collation - CharInfo::FIRST_SYS_COLLATION -1);
   }

   inline static short  getCollationNPasses(const CharInfo::Collation collation)
   {
     return collationNPasses[getCollationParamsIndex( collation) ];
   }

   inline static unsigned char  getCollationMaxChar(const CharInfo::Collation collation)
   {
     return collationMaxChar[getCollationParamsIndex( collation) ];
   }


 private:

   void setFlags(CollationFlags f)			{ flags_ |= f; }
   void clrFlags(CollationFlags f)			{ flags_ &= ~f; }

   // See ComMPLoc::getMPName(size_t *)
   // and QualifiedName::getQualifiedNameAsAnsiString(size_t *)
   // -- an MP name can have up to 4 parts (MAX_NAME_PARTS),
   // and an Ansi name up to 3.
   // Hence, here we have:
   // - one fixed string for the whole name,
   // - the length of the whole,
   //   and, where the caller (CollationDB::insert methods)
   //   has determined that valid synonyms exist based on current defaults,
   // - offsets into the string pointing to those synonyms
   //   (an offset value of zero means no synonym!).
   //
   // E.g., for name "\X.$Y.Z.W", if default MPLoc was "\X.$Y.DIFFSV",
   //		     012345678
   // here the namelen would of course be 9,
   // and the three offsets would be 3, 6, and 0.
   // Put another way, the string starting at name_[3] is "$Y.Z.W",
   // the string at name_[6] is "Z.W",
   // both of which are equivalent to the full name, GIVEN THE DEFAULTS.
   // The string at name_[8] is "W", which by the given defaults
   // is equivalent to the different full name "\X.$Y.DIFFSV.W";
   // hence the third offset is 0 in this example.
   //
   CharInfo::Collation	co_;
   Int32			flags_;
   const char*		name_;
   size_t		namelen_;
   size_t		synonymOffset_[OFFSETARRAY_SIZE];

 }; // CollationInfo


 typedef LIST(const CollationInfo *) CollationDBSupertype;
 typedef LIST(CollationDB *)   CollationDBList;

 class CollationDB : private CollationDBSupertype
 {
 public:

   CollationDB(CollHeap *h);

   CollationDB(CollHeap *h, const CollationInfo *co, size_t count);

   ~CollationDB();

   void clearAndReset()
   {
     for (CollIndex i = entries(); i--; ) {
       delete at(i);
       at(i) = NULL;	// (just in case)
     }
     clear();
   }

   // for debugging only
   void			display() const;
   static void		Display();

   NABoolean &refreshNeeded()	{ return refreshNeeded_; }

   // The insert methods assume you've already done an unsuccessful lookup.
   //
   // They return a newly generated CharInfo::Collation value for the
   // user-defined collation just inserted.

   CharInfo::Collation insert(ComMPLoc &loc,
 			     const ComMPLoc *defaultMPLoc,
 			     CollationInfo::CollationFlags
 			     		    flags=CollationInfo::NO_FLAGS);

   CharInfo::Collation insert(QualifiedName &qn,
 			     const SchemaName *defaultSchema,
 			     CollationInfo::CollationFlags
 					    flags=CollationInfo::NO_FLAGS);

 private:
 friend class CharInfo;	// its static funx should be the only callers of these:

   CharInfo::Collation insert(const char *nam,
 			     size_t *sizArray,	    /* array[SIZEARRAY_SIZE] */
 			     CollationInfo::CollationFlags flags,
 			     Int32 defaultMatchCount);
   inline
   CollationDB *		nextCDB() const;

   const CollationInfo *	getCollationInfo(CharInfo::Collation co) const;

   CharInfo::Collation	getCollationEnum(const char *name,
 					 NABoolean formatNSK,
   					 size_t namlen) const;
   const char *		getCollationName(CharInfo::Collation co,
   					 NABoolean retUnkAsBlank) const;

   Int32			getCollationFlags(CharInfo::Collation co) const;

   // data members
   CollHeap *			heap_;
   NABoolean			refreshNeeded_;

   static Lng32			nextUserCo_;
 }; // CollationDB


 #endif //CHARINFO_H
	/**********************************************************************
	// @@@ START COPYRIGHT @@@
	//
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	//
	// @@@ END COPYRIGHT @@@
	**********************************************************************/
	#ifndef CHARINFO_H
	#define CHARINFO_H
	/* --C++--
	*****************************************************************************
	*
	* File: CharInfo.h
	* Description: The header file for the CharInfo class. This class defines
	* and provides features/information about character sets,
	* collations and coercibility that are supported by SQL/MX.
	*
	*
	* Created: 7/8/98
	* Modified: $Date: 2007/10/09 19:38:37 $ (GMT)
	* Language: C++
	* Status: $State: Exp $
	*
	*
	*
	*****************************************************************************
	*/


	#include "Platform.h"

	#include "Collections.h" // for LIST in CollationDB
	#include "NABoolean.h"
	#include "Platform.h"
	#include "NAWinNT.h"
	#include "ComCharSetDefs.h"
	#include "sql_charset_strings.h"

	// Forward references
	class ComMPLoc;
	class QualifiedName;
	class SchemaName;

	// Contents of this file
	class CharInfo;
	class CollationInfo;
	class CollationDB;


	// Notice how we currently equate SQL_TEXT with ISO88591.
	// This is used by StaticCompiler.cpp, in the MODULE statement.
	//
	// *HOWEVER:* ##
	// Ansi 4.2 specifies that SQL_TEXT has to be the union of all char-sets
	// implemented by an implementation. In our case, it should be UNICODE.
	// So I am not sure if we should disallow 'MODULE...NAMES ARE SQL_TEXT'
	// and instead promote '...NAMES ARE ISO88591' instead.
	//
	// Taking a UNICODE module can be done, except that we can not
	// effectively spit out the error msgs as we do not know the locale
	// into which error msg is converted
	// (NT does not have a font that would cover all UNICODE characters!).
	//
	// Disallowing 'MODULE...NAMES ARE SQL_TEXT' anytime soon would cause
	// disruption to our Beta sites.
	// I propose that, whenever we do fully support a UNICODE module file,
	// then we switch the CharInfo enum SQL_TEXT to equal UNICODE instead
	// of ISO88591 (and deal with any disruption then...).


	#undef UNICODE
	// conflict with definition in file sqlmxevents/zmxc
	#undef SQL_TEXT

	#undef MAX_CHAR_SET_STRING_LENGTH
	#define MAX_CHAR_SET_STRING_LENGTH 128 // keep in sync with w:/cli/sqlcli.h
	// and with w:/common/ComSizeDefs.h
	// ComMAX_ANSI_IDENTIFIER_INTERNAL_LEN_IN_NAWCHARS

	#define MAXNPASSES 4 // Maximum number of Passes for any collation
	static const short collationNPasses[] = {2,2};
	static const unsigned char collationMaxChar[] = {0x81,0x81};


	class CharInfo
	{
	public:

	enum CharSet // keep in sync with w:/cli/sqlcli.h enum SQLCHARSET_CODE !
	{ // and with the charset map table in CharInfo.cpp !
	CHARSET_MIN = -2,
	KSC5601_MP = -2, // an MX Unicode encoding could be named KSX5601.
	KANJI_MP = -1, // logically equiv to SJIS, physically diff encoding.
	// the defines below are defined in common/ComCharSetDefs.h, so
	// that they can be used by other components (ODBC, utilities?)
	// without sourcing in this file
	UnknownCharSet = SQLCHARSETCODE_UNKNOWN, // 0
	ISO88591 = SQLCHARSETCODE_ISO88591, // 1
	ISO88592 = 2,
	ISO88593 = 3,
	ISO88594 = 4,
	ISO88595 = 5,
	ISO88596 = 6,
	ISO88597 = 7,
	ISO88598 = 8,
	ISO88599 = 9,
	SJIS = SQLCHARSETCODE_SJIS, // 10
	UNICODE = SQLCHARSETCODE_UCS2, // 11
	EUCJP = SQLCHARSETCODE_EUCJP, // 12
	BIG5 = SQLCHARSETCODE_BIG5, // 13
	GB18030 = SQLCHARSETCODE_GB18030, // 14
	UTF8 = SQLCHARSETCODE_UTF8, // 15
	KSC5601 = SQLCHARSETCODE_MB_KSC5601, // 16
	GB2312 = SQLCHARSETCODE_GB2312, // 17
	GBK = SQLCHARSETCODE_GBK, // 18
	//
	// synonyms
	SQL_TEXT = UNICODE,
	UCS2 = UNICODE,
	DefaultCharSet = ISO88591,

	CHARSET_MAX = 18,

	// for internal use only
	ISO_MAPPING_CODE = SQLCHARSETCODE_ISO_MAPPING // 9999
	};

	enum Collation { UNKNOWN_COLLATION = 0,
	DefaultCollation = 1,
	SJIS_COLLATION = SJIS, // to prevent coding bugs!// SJIIS= 10
	FIRST_SYS_COLLATION =100, //system collations start at 101 to Last_sys_collation
	CZECH_COLLATION =101,
	CZECH_COLLATION_CI =102,
	LAST_SYS_COLLATION ,
	FIRST_USER_DEFINED_COLLATION = 1000
	};

	enum Coercibility { NO_COLLATING_SEQUENCE = 0, COERCIBLE, IMPLICIT, EXPLICIT
	};

	static CharSet getCharSetEnum(const char* name);
	static const char* getCharSetName(CharSet cs,
	NABoolean retUnkAsBlank = FALSE);
	static NABoolean isCharSetSupported(CharSet cs);
	static NABoolean isCharSetSupported(const char* name)
	{ return isCharSetSupported(getCharSetEnum(name)); }

	static NABoolean isCharSetFullySupported(CharSet cs);
	static NABoolean isCharSetFullySupported(const char* name)
	{ return isCharSetFullySupported(getCharSetEnum(name)); }

	static NABoolean isOnlySingleByteCharacters(CharSet cs);
	static NABoolean isOnlySingleByteCharacters(const char* name)
	{ return isOnlySingleByteCharacters(getCharSetEnum(name)); }

	static NABoolean isModuleCharSetSupported(CharSet cs)
	{ return cs == CharInfo::ISO88591; };

	static NABoolean isHexFormatSupported(CharSet cs);
	static NABoolean isHexFormatSupported(const char* name)
	{ return isHexFormatSupported(getCharSetEnum(name)); }

	static NABoolean isTerminalCharSetSupported(CharSet cs);
	static NABoolean isTerminalCharSetSupported(const char* name)
	{ return isTerminalCharSetSupported(getCharSetEnum(name)); }

	static NABoolean isMsgCharSetSupported(CharSet cs);
	static NABoolean isMsgCharSetSupported(const char* name)
	{ return isMsgCharSetSupported(getCharSetEnum(name)); }

	static void toggleCharSetSupport(CharSet cs); // debugging only
	static Int32 minBytesPerChar(CharSet cs);
	static Int32 maxBytesPerChar(CharSet cs);
	static Int32 bytesPerChar(CharSet cs)
	{ return maxBytesPerChar(cs); }

	static NABoolean isSingleByteCharSet(CharSet cs)
	{ return maxBytesPerChar(cs) == 1
	\|\| cs == CharInfo::UTF8 // is variable-length/width multi-byte char-set but treat it as a C/C++ string
	; }
	static NABoolean isVariableWidthMultiByteCharSet(CharSet cs);

	static NABoolean is_NCHAR_MP(CharSet cs)
	{ return cs == KANJI_MP \|\| cs == KSC5601_MP; }

	static Int32 getFSTypeFixedChar(CharSet cs);
	static Int32 getFSTypeVarChar(CharSet cs);
	static Int32 getFSTypeANSIChar(CharSet cs);

	static const char* getReplacementCharacter(CharSet cs); // replacement for untranslatable chars

	// get the encoding charset for a (logical/SQL) charset
	static CharInfo::CharSet getEncoding(const CharInfo::CharSet);


	static Collation getCollationEnum(const char *name,
	NABoolean formatNSK = FALSE,
	size_t maxlen = 0);
	static const char* getCollationName(Collation co,
	NABoolean retUnkAsBlank = FALSE);
	static Int32 getCollationFlags(Collation co);
	static NABoolean isCollationUserDefined(Collation co)
	{ return co >= FIRST_USER_DEFINED_COLLATION; } // watch out for UNKNOWN_COLLATION!


	static const char* getCoercibilityText(Coercibility ce);
	static NABoolean compareCoercibility(Coercibility ce1, Coercibility ce2);

	// check if the code point value for each character in the input string is
	// a valid UCS2 character
	static NABoolean checkCodePoint(const NAWchar *inputStr, Int32 inputLen, CharInfo::CharSet cs);

	// Convert the int value returned by MBCS_DEFAULTCHARSET_()
	// to MX enum value. See Guardian Procedure Calls Ref Manual.
	static CharSet getCharSetEnumFromNSK_MBCS(Int32 n)
	{
	switch (n) {
	case 1: return KANJI_MP;
	case 12: return KSC5601_MP;
	default: return UnknownCharSet; // we don't support other magic numbers
	} // like Hangul, Big5, Chinese PC
	}

	// Obtain the character set locale information about locale machine.
	// The result is one of the SQLCHARSETCODE_xxx values defined in SQLCLI.h,
	// or "UNKNOWN" for a unknown locale.
	// Used by the sql_id and Formatter classes.
	static Lng32 findLocaleCharSet();
	static const char* getLocaleCharSetAsString();
	static Int32 getTargetCharTypeFromLocale();

	// check whether the client character set (e.g., the cs of a hostvar) is
	// assignment compatible with the MX one (e.g., the cs of a column).
	static NABoolean isAssignmentCompatible(CharSet clientCS, CharSet mxCS)
	{
	return clientCS == mxCS \|\|
	(clientCS == CharInfo::UNICODE && // relaxation
	mxCS == CharInfo::ISO88591);
	};

	// for an arbitrary string encoded in "sourceCS", with length
	// "sourceLenInBytes", what is the max. length in bytes of this
	// string after converting it to "targetCS"?
	static Int32 getMaxConvertedLenInBytes(CharSet sourceCS,
	Int32 sourceLenInBytes,
	CharSet targetCS);

	static const CollationDB *builtinCollationDB();


	private:
	friend class CollationDB; // needs to access builtinCDB_

	static const char* const localeCharSet_;
	static const CollationDB *builtinCollationDB_;

	}; // CharInfo

	// For the convenience of SqlParser.y, and ItemExpr::bindNode()
	struct CollationAndCoercibility {
	CharInfo::Collation collation_;
	CharInfo::Coercibility coercibility_;
	};


	class CollationInfo : public NABasicObject
	{
	friend class CollationDB; // for robust security/validity: see private methods!

	public:
	enum CollationFlags {
	// external (governing SQL syntax and what MX supports) flags
	NO_FLAGS = 0,
	NO_PAD = 0x1, // Ansi NO PAD (vs. PAD SPACE)
	ORDERED_CMP_ILLEGAL = 0x10, // MP collations in MX-NSK-R1
	EQ_NE_CMP_ILLEGAL = 0x20, // MP collations in MX-NSK-R1
	ALL_CMP_ILLEGAL = EQ_NE_CMP_ILLEGAL \| ORDERED_CMP_ILLEGAL,
	ALL_NEGATIVE_SYNTAX_FLAGS = 0x0FFFFFFF,

	// internal (class-implementation) flags
	NO_ALLOC_AND_COPY_IN_CTOR = 0x10000000,

	ALL_NEGATIVE_PLUS_STATIC = ALL_NEGATIVE_SYNTAX_FLAGS \| NO_ALLOC_AND_COPY_IN_CTOR
	};
	enum CollationMisc { MAX_NAME_PARTS = 4,
	SIZEARRAY_SIZE = MAX_NAME_PARTS + 1,
	OFFSETARRAY_SIZE = MAX_NAME_PARTS - 1
	};

	enum Pass
	{
	FirstPass =0,
	SecondPass =1,
	ThirdPass =2,
	FourthPass =3
	};

	enum CollationType
	{

	Sort =0, // Used for sort. when the collation type is Sort, the encoded
	// value of a nullable column is prepended with 2 bytes
	// indicating whether the value is null or not null

	Compare =1, // Used to compare two character strings. The main diffrence
	// with the sort type is that the encoded value of a null value
	// is actually a null value

	Search =2 // used to do string search
	};

	enum SortDirection
	{
	DefaultDir =0,
	Ascending =1,
	Descending =2
	};


	CollationInfo(CollHeap h, CharInfo::Collation co, const char name,
	CollationFlags flags = NO_FLAGS,
	size_t sizArray = NULL); / array[SIZEARRAY_SIZE] */

	~CollationInfo()
	{
	if (!(flags_ \| NO_ALLOC_AND_COPY_IN_CTOR)) delete (char*)name_;
	}

	CharInfo::Collation getCollationEnum() const { return co_; }
	const char * getCollationName() const { return name_; }
	Int32 getCollationFlags() const { return flags_; }

	// for debugging
	void display() const;

	static NABoolean isSystemCollation(const CharInfo::Collation collation)
	{
	return (collation > CharInfo::FIRST_SYS_COLLATION && collation < CharInfo::LAST_SYS_COLLATION);
	}

	static short getCollationParamsIndex(const CharInfo::Collation collation)
	{
	return (collation - CharInfo::FIRST_SYS_COLLATION -1);
	}

	inline static short getCollationNPasses(const CharInfo::Collation collation)
	{
	return collationNPasses[getCollationParamsIndex( collation) ];
	}

	inline static unsigned char getCollationMaxChar(const CharInfo::Collation collation)
	{
	return collationMaxChar[getCollationParamsIndex( collation) ];
	}




	private:

	void setFlags(CollationFlags f) { flags_ \|= f; }
	void clrFlags(CollationFlags f) { flags_ &= ~f; }

	// See ComMPLoc::getMPName(size_t *)
	// and QualifiedName::getQualifiedNameAsAnsiString(size_t *)
	// -- an MP name can have up to 4 parts (MAX_NAME_PARTS),
	// and an Ansi name up to 3.
	// Hence, here we have:
	// - one fixed string for the whole name,
	// - the length of the whole,
	// and, where the caller (CollationDB::insert methods)
	// has determined that valid synonyms exist based on current defaults,
	// - offsets into the string pointing to those synonyms
	// (an offset value of zero means no synonym!).
	//
	// E.g., for name "\X.$Y.Z.W", if default MPLoc was "\X.$Y.DIFFSV",
	// 012345678
	// here the namelen would of course be 9,
	// and the three offsets would be 3, 6, and 0.
	// Put another way, the string starting at name_[3] is "$Y.Z.W",
	// the string at name_[6] is "Z.W",
	// both of which are equivalent to the full name, GIVEN THE DEFAULTS.
	// The string at name_[8] is "W", which by the given defaults
	// is equivalent to the different full name "\X.$Y.DIFFSV.W";
	// hence the third offset is 0 in this example.
	//
	CharInfo::Collation co_;
	Int32 flags_;
	const char* name_;
	size_t namelen_;
	size_t synonymOffset_[OFFSETARRAY_SIZE];

	}; // CollationInfo


	typedef LIST(const CollationInfo *) CollationDBSupertype;
	typedef LIST(CollationDB *) CollationDBList;

	class CollationDB : private CollationDBSupertype
	{
	public:

	CollationDB(CollHeap *h);

	CollationDB(CollHeap h, const CollationInfo co, size_t count);

	~CollationDB();

	void clearAndReset()
	{
	for (CollIndex i = entries(); i--; ) {
	delete at(i);
	at(i) = NULL; // (just in case)
	}
	clear();
	}

	// for debugging only
	void display() const;
	static void Display();

	NABoolean &refreshNeeded() { return refreshNeeded_; }

	// The insert methods assume you've already done an unsuccessful lookup.
	//
	// They return a newly generated CharInfo::Collation value for the
	// user-defined collation just inserted.

	CharInfo::Collation insert(ComMPLoc &loc,
	const ComMPLoc *defaultMPLoc,
	CollationInfo::CollationFlags
	flags=CollationInfo::NO_FLAGS);

	CharInfo::Collation insert(QualifiedName &qn,
	const SchemaName *defaultSchema,
	CollationInfo::CollationFlags
	flags=CollationInfo::NO_FLAGS);

	private:
	friend class CharInfo; // its static funx should be the only callers of these:

	CharInfo::Collation insert(const char *nam,
	size_t sizArray, / array[SIZEARRAY_SIZE] */
	CollationInfo::CollationFlags flags,
	Int32 defaultMatchCount);
	inline
	CollationDB * nextCDB() const;

	const CollationInfo * getCollationInfo(CharInfo::Collation co) const;

	CharInfo::Collation getCollationEnum(const char *name,
	NABoolean formatNSK,
	size_t namlen) const;
	const char * getCollationName(CharInfo::Collation co,
	NABoolean retUnkAsBlank) const;

	Int32 getCollationFlags(CharInfo::Collation co) const;

	// data members
	CollHeap * heap_;
	NABoolean refreshNeeded_;

	static Lng32 nextUserCo_;
	}; // CollationDB


	#endif //CHARINFO_H