core/sql/common/conversionISO88591.cpp - trafodion - Git at Google

 /**********************************************************************
 // @@@ START COPYRIGHT @@@
 //
 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.
 //
 // @@@ END COPYRIGHT @@@
 **********************************************************************/
 /* -*-C++-*-
  *****************************************************************************
  *
  * File:         conversionISO88591.h
  * RCS:          $Id:
  * Description:  The implementation of ISO88591 related conversion routins
  *
  *
  * Created:      7/8/98
  * Modified:     $ $Date: 1998/08/10 16:00:29 $ (GMT)
  * Language:     C++
  * Status:       $State: Exp $
  *
  *
  *
  *
  *****************************************************************************
  */


 // define MODULE_DEBUG when the module is to be debugged separately.
 // #define  MODULE_DEBUG

 #include "NLSConversion.h"
 #include "str.h"
 #include "csconvert.h"
 #include "charinfo.h"
 #include "nawstring.h"
 #ifdef MODULE_DEBUG
 #include "NLSConversion.cpp"
 #endif

 //
 // Conversions between ISO88591 and Unicode.
 //
 // The Unicode mapping available at http://www.unicode.org/Public/MAPPINGS/
 // ISO8859/8859-1.TXT or described in the Unicode Standard Version 3.0 is
 // used to construct the two routines.
 //
 // Note the Microsoft Windows code page 1252 is a super set of ISO88591.
 //

 NAWcharBuf* ISO88591ToUnicode(const charBuf& input,
 	CollHeap *heap, NAWcharBuf*& unicodeString, NABoolean addNullAtEnd)
 {
    NAWcharBuf* output = checkSpace(heap, input.getStrLen(), unicodeString, addNullAtEnd);

    if ( output == 0 ) return 0;

    NAWchar* target = output->data();

    Int32 i;
    for ( i=0; i<input.getStrLen(); i++ ) {
       target[i] = (NAWchar)(input.data()[i]);
    }

    if ( addNullAtEnd )
       target[i] = 0;

    output->setStrLen(input.getStrLen());
    return output;

 }

 charBuf* unicodeToISO88591(const NAWcharBuf& input,
 	CollHeap *heap, charBuf*& iso88591String,
         NABoolean addNullAtEnd, NABoolean allowInvalidCodePoint)
 {
    charBuf* output = checkSpace(heap, input.getStrLen(), iso88591String, addNullAtEnd);

    if ( output == 0 ) return 0;

    unsigned char* target = output->data();

    Int32 i;
    for ( i=0; i<input.getStrLen(); i++ ) {
       if ( input.data()[i] > 0xFF ) {
          if ( allowInvalidCodePoint )
            target[i] = '?';
          else {
            if ( iso88591String == NULL ) {
               if (heap)
                 NADELETE(output, charBuf, heap);
               else
                 delete output;
            }
            return NULL;
          }
       }  else
          target[i] = (unsigned char)(input.data()[i]);
    }

    if ( addNullAtEnd )
       target[i] = 0;

    output -> setStrLen(input.getStrLen());
    return output;
 }

 //
 // Conversions between cset and Unicode.   (wrapper for new calls)
 //
 extern cnv_charset convertCharsetEnum (Int32 inset);
 NAWcharBuf* csetToUnicode(const charBuf& input,
 	CollHeap *heap, NAWcharBuf*& unicodeString, Int32 cset, Int32 &errorcode,
         NABoolean addNullAtEnd, Int32 *charCount, Int32 *errorByteOff)
 {
     char * err_ptr = NULL;
     UInt32 byteCount = 0, lv_charCount = 0, computedMaxBufSizeInNAWchars = 0;
     NABoolean outputBufferAllocatedByThisRoutine = (unicodeString == NULL) ? TRUE : FALSE;
     enum cnv_charset cnvSet = convertCharsetEnum (cset);

     computedMaxBufSizeInNAWchars = (input.getStrLen()+1)*2; // in NAWchar elements for the worst case

     NAWcharBuf* output = checkSpace(heap, computedMaxBufSizeInNAWchars, unicodeString, addNullAtEnd);

     if ( output == NULL ) {errorcode = CNV_ERR_BUFFER_OVERRUN; return NULL;}

     NAWchar* target = output->data();

     errorcode = LocaleToUTF16(
          cnv_version1,
          (const char *)input.data(), input.getStrLen(),
          (const char *)target, output->getBufSize()*BYTES_PER_NAWCHAR /* in bytes */,
          cnvSet,
          err_ptr,
          &byteCount,
          0,
          addNullAtEnd,
          &lv_charCount);
     if (errorcode == CNV_ERR_NOINPUT) errorcode=0;  // empty string is OK
     if (errorByteOff) *errorByteOff = err_ptr - (char *)input.data();
     if (charCount)    *charCount    = (Int32)lv_charCount;
     // If errorcode != 0, LocaleToUTF16 will not add the NULL terminator
     if (errorcode == 0 && addNullAtEnd && byteCount > 0)
        {
          // Exclude the size (in bytes) of the NULL terminator from the byte count.
          if (byteCount > BYTES_PER_NAWCHAR)
            byteCount -= BYTES_PER_NAWCHAR;
          else
            byteCount = 0;
        }

     output->setStrLen/*in_NAWchar_s*/(byteCount/BYTES_PER_NAWCHAR); // excluding the NULL terminator

     return output;

 }

 charBuf* unicodeTocset(const NAWcharBuf& input,
 	CollHeap *heap, charBuf*& csetString, Int32 cset, Int32 &errorcode,
         NABoolean addNullAtEnd, NABoolean allowInvalidCodePoint,
         Int32 *charCount, Int32 *errorByteOff)
 {
    char * err_ptr;
    UInt32 byteCount, lvCharCount;
    enum cnv_charset cnvSet = convertCharsetEnum (cset);
    Int32 cwidth = CharInfo::maxBytesPerChar((CharInfo::CharSet)cset);
    charBuf* output = NULL;
    if ( input.data() != NULL && input.getStrLen() > 0)
    {
      Int32 cSetTargetBufferSizeInBytes = input.getStrLen/*in_NAWchars*/()*cwidth+16; // memory is cheap
      UInt32 cSetTargetStrLenInBytes = 0;
      char *pTempTargetBuf = new(heap) char[cSetTargetBufferSizeInBytes];
      errorcode = UTF16ToLocale ( cnv_version1
                                , (const char *)input.data()           // source string
                                , input.getStrLen()*BYTES_PER_NAWCHAR  // source string length in bytes
                                , (const char *)pTempTargetBuf         // buffer for the converted string
                                , cSetTargetBufferSizeInBytes          // target buffer size in bytes
                                , cnvSet                               // convert from UTF16 to cnvSet
                                , err_ptr
                                , &cSetTargetStrLenInBytes // out - length in bytes of the converted string
                                , 0
                                , addNullAtEnd
                                , allowInvalidCodePoint
                                );
      NADELETEBASICARRAY(pTempTargetBuf, heap); pTempTargetBuf = NULL;
      if (errorcode == 0 && cSetTargetStrLenInBytes > 0)
        output = checkSpace(heap, cSetTargetStrLenInBytes, csetString, addNullAtEnd);
      else // invoke the old code (i.e., keep the old behavior)
        output = checkSpace(heap, input.getStrLen()*cwidth, csetString, addNullAtEnd);
    }
    else // invoke the old code (i.e., keep the old behavior)
      output = checkSpace(heap, input.getStrLen()*cwidth, csetString, addNullAtEnd);

    if ( output == 0 ) {errorcode = CNV_ERR_BUFFER_OVERRUN; return 0;}

    unsigned char* target = output->data();

    errorcode =   UTF16ToLocale( cnv_version1,
                     (const char *)input.data(), input.getStrLen()*BYTES_PER_NAWCHAR,
                     (const char *)target, output->getBufSize(),
                     cnvSet ,
                     err_ptr,
                     &byteCount ,
 		    0,
 		    addNullAtEnd,
                     allowInvalidCodePoint,
                     &lvCharCount);
    if (errorcode == CNV_ERR_NOINPUT)
      errorcode=0;  // empty string is OK
    if (errorByteOff)
      *errorByteOff = err_ptr - (char *)input.data();
    if (charCount)
      *charCount    = (Int32)lvCharCount;

    // If errorcode != 0, LocaleToUTF16 will not add the NULL terminator
    if (errorcode == 0 && addNullAtEnd && byteCount > 0)
      {
        // Exclude the size (in bytes) of the NULL terminator from the byte count.
        UInt32 nullLen = CharInfo::minBytesPerChar((CharInfo::CharSet) cset);

        if (byteCount > nullLen)
          byteCount -= nullLen;
        else
          byteCount = 0;
      }

    output -> setStrLen(byteCount/*in_bytes*/); // excluding the null terminator from the count
    return output;
 }


 #ifdef MODULE_DEBUG
 Int32 main(Int32 argc, char** argv)
 {
       charBuf* latin1 = 0;

       NAWchar wbuf[1];
       NAWcharBuf uni(wbuf, 1);

       for ( NAWchar i=0; i<0xff; i++ ) {
          wbuf[0] = i;
          latin1 = unicodeToISO88591(uni, 0, latin1);
          if ( latin1 && latin1->data()[0] != i ) {
                printf("u2l1 test failed\n");
                return 1;
          }
       }

       unsigned char buf[1];
       charBuf ascii(buf, 1);
       NAWcharBuf* unicode = 0;

       for ( unsigned char j=0; j<0xff; j++ ) {
          buf[0] = j;
          unicode = ISO88591ToUnicode(ascii, 0, unicode);

          if ( unicode && unicode->data()[0] != j ) {
                printf("l12u test failed\n");
                return 1;
             }
       }

       wbuf[0] = 0xC0F3; // negative test
       latin1 = unicodeToISO88591(uni, 0, latin1);
       if ( latin1 )
          printf("negative u2l1 test failed\n");

       printf("test pass\n");

      return 0;
 }

 #endif
	/**********************************************************************
	// @@@ START COPYRIGHT @@@
	//
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.
	//
	// @@@ END COPYRIGHT @@@
	**********************************************************************/
	/* --C++--
	*****************************************************************************
	*
	* File: conversionISO88591.h
	* RCS: $Id:
	* Description: The implementation of ISO88591 related conversion routins
	*
	*
	* Created: 7/8/98
	* Modified: $ $Date: 1998/08/10 16:00:29 $ (GMT)
	* Language: C++
	* Status: $State: Exp $
	*
	*
	*
	*
	*****************************************************************************
	*/



	// define MODULE_DEBUG when the module is to be debugged separately.
	// #define MODULE_DEBUG

	#include "NLSConversion.h"
	#include "str.h"
	#include "csconvert.h"
	#include "charinfo.h"
	#include "nawstring.h"
	#ifdef MODULE_DEBUG
	#include "NLSConversion.cpp"
	#endif

	//
	// Conversions between ISO88591 and Unicode.
	//
	// The Unicode mapping available at http://www.unicode.org/Public/MAPPINGS/
	// ISO8859/8859-1.TXT or described in the Unicode Standard Version 3.0 is
	// used to construct the two routines.
	//
	// Note the Microsoft Windows code page 1252 is a super set of ISO88591.
	//

	NAWcharBuf* ISO88591ToUnicode(const charBuf& input,
	CollHeap heap, NAWcharBuf& unicodeString, NABoolean addNullAtEnd)
	{
	NAWcharBuf* output = checkSpace(heap, input.getStrLen(), unicodeString, addNullAtEnd);

	if ( output == 0 ) return 0;

	NAWchar* target = output->data();

	Int32 i;
	for ( i=0; i<input.getStrLen(); i++ ) {
	target[i] = (NAWchar)(input.data()[i]);
	}

	if ( addNullAtEnd )
	target[i] = 0;

	output->setStrLen(input.getStrLen());
	return output;

	}

	charBuf* unicodeToISO88591(const NAWcharBuf& input,
	CollHeap heap, charBuf& iso88591String,
	NABoolean addNullAtEnd, NABoolean allowInvalidCodePoint)
	{
	charBuf* output = checkSpace(heap, input.getStrLen(), iso88591String, addNullAtEnd);

	if ( output == 0 ) return 0;

	unsigned char* target = output->data();

	Int32 i;
	for ( i=0; i<input.getStrLen(); i++ ) {
	if ( input.data()[i] > 0xFF ) {
	if ( allowInvalidCodePoint )
	target[i] = '?';
	else {
	if ( iso88591String == NULL ) {
	if (heap)
	NADELETE(output, charBuf, heap);
	else
	delete output;
	}
	return NULL;
	}
	} else
	target[i] = (unsigned char)(input.data()[i]);
	}

	if ( addNullAtEnd )
	target[i] = 0;

	output -> setStrLen(input.getStrLen());
	return output;
	}

	//
	// Conversions between cset and Unicode. (wrapper for new calls)
	//
	extern cnv_charset convertCharsetEnum (Int32 inset);
	NAWcharBuf* csetToUnicode(const charBuf& input,
	CollHeap heap, NAWcharBuf& unicodeString, Int32 cset, Int32 &errorcode,
	NABoolean addNullAtEnd, Int32 charCount, Int32 errorByteOff)
	{
	char * err_ptr = NULL;
	UInt32 byteCount = 0, lv_charCount = 0, computedMaxBufSizeInNAWchars = 0;
	NABoolean outputBufferAllocatedByThisRoutine = (unicodeString == NULL) ? TRUE : FALSE;
	enum cnv_charset cnvSet = convertCharsetEnum (cset);

	computedMaxBufSizeInNAWchars = (input.getStrLen()+1)*2; // in NAWchar elements for the worst case

	NAWcharBuf* output = checkSpace(heap, computedMaxBufSizeInNAWchars, unicodeString, addNullAtEnd);

	if ( output == NULL ) {errorcode = CNV_ERR_BUFFER_OVERRUN; return NULL;}

	NAWchar* target = output->data();

	errorcode = LocaleToUTF16(
	cnv_version1,
	(const char *)input.data(), input.getStrLen(),
	(const char )target, output->getBufSize()BYTES_PER_NAWCHAR /* in bytes */,
	cnvSet,
	err_ptr,
	&byteCount,
	0,
	addNullAtEnd,
	&lv_charCount);
	if (errorcode == CNV_ERR_NOINPUT) errorcode=0; // empty string is OK
	if (errorByteOff) errorByteOff = err_ptr - (char )input.data();
	if (charCount) *charCount = (Int32)lv_charCount;
	// If errorcode != 0, LocaleToUTF16 will not add the NULL terminator
	if (errorcode == 0 && addNullAtEnd && byteCount > 0)
	{
	// Exclude the size (in bytes) of the NULL terminator from the byte count.
	if (byteCount > BYTES_PER_NAWCHAR)
	byteCount -= BYTES_PER_NAWCHAR;
	else
	byteCount = 0;
	}

	output->setStrLen/in_NAWchar_s/(byteCount/BYTES_PER_NAWCHAR); // excluding the NULL terminator

	return output;

	}

	charBuf* unicodeTocset(const NAWcharBuf& input,
	CollHeap heap, charBuf& csetString, Int32 cset, Int32 &errorcode,
	NABoolean addNullAtEnd, NABoolean allowInvalidCodePoint,
	Int32 charCount, Int32 errorByteOff)
	{
	char * err_ptr;
	UInt32 byteCount, lvCharCount;
	enum cnv_charset cnvSet = convertCharsetEnum (cset);
	Int32 cwidth = CharInfo::maxBytesPerChar((CharInfo::CharSet)cset);
	charBuf* output = NULL;
	if ( input.data() != NULL && input.getStrLen() > 0)
	{
	Int32 cSetTargetBufferSizeInBytes = input.getStrLen/in_NAWchars/()*cwidth+16; // memory is cheap
	UInt32 cSetTargetStrLenInBytes = 0;
	char *pTempTargetBuf = new(heap) char[cSetTargetBufferSizeInBytes];
	errorcode = UTF16ToLocale ( cnv_version1
	, (const char *)input.data() // source string
	, input.getStrLen()*BYTES_PER_NAWCHAR // source string length in bytes
	, (const char *)pTempTargetBuf // buffer for the converted string
	, cSetTargetBufferSizeInBytes // target buffer size in bytes
	, cnvSet // convert from UTF16 to cnvSet
	, err_ptr
	, &cSetTargetStrLenInBytes // out - length in bytes of the converted string
	, 0
	, addNullAtEnd
	, allowInvalidCodePoint
	);
	NADELETEBASICARRAY(pTempTargetBuf, heap); pTempTargetBuf = NULL;
	if (errorcode == 0 && cSetTargetStrLenInBytes > 0)
	output = checkSpace(heap, cSetTargetStrLenInBytes, csetString, addNullAtEnd);
	else // invoke the old code (i.e., keep the old behavior)
	output = checkSpace(heap, input.getStrLen()*cwidth, csetString, addNullAtEnd);
	}
	else // invoke the old code (i.e., keep the old behavior)
	output = checkSpace(heap, input.getStrLen()*cwidth, csetString, addNullAtEnd);

	if ( output == 0 ) {errorcode = CNV_ERR_BUFFER_OVERRUN; return 0;}

	unsigned char* target = output->data();

	errorcode = UTF16ToLocale( cnv_version1,
	(const char )input.data(), input.getStrLen()BYTES_PER_NAWCHAR,
	(const char *)target, output->getBufSize(),
	cnvSet ,
	err_ptr,
	&byteCount ,
	0,
	addNullAtEnd,
	allowInvalidCodePoint,
	&lvCharCount);
	if (errorcode == CNV_ERR_NOINPUT)
	errorcode=0; // empty string is OK
	if (errorByteOff)
	errorByteOff = err_ptr - (char )input.data();
	if (charCount)
	*charCount = (Int32)lvCharCount;

	// If errorcode != 0, LocaleToUTF16 will not add the NULL terminator
	if (errorcode == 0 && addNullAtEnd && byteCount > 0)
	{
	// Exclude the size (in bytes) of the NULL terminator from the byte count.
	UInt32 nullLen = CharInfo::minBytesPerChar((CharInfo::CharSet) cset);

	if (byteCount > nullLen)
	byteCount -= nullLen;
	else
	byteCount = 0;
	}

	output -> setStrLen(byteCount/in_bytes/); // excluding the null terminator from the count
	return output;
	}


	#ifdef MODULE_DEBUG
	Int32 main(Int32 argc, char** argv)
	{
	charBuf* latin1 = 0;

	NAWchar wbuf[1];
	NAWcharBuf uni(wbuf, 1);

	for ( NAWchar i=0; i<0xff; i++ ) {
	wbuf[0] = i;
	latin1 = unicodeToISO88591(uni, 0, latin1);
	if ( latin1 && latin1->data()[0] != i ) {
	printf("u2l1 test failed\n");
	return 1;
	}
	}

	unsigned char buf[1];
	charBuf ascii(buf, 1);
	NAWcharBuf* unicode = 0;

	for ( unsigned char j=0; j<0xff; j++ ) {
	buf[0] = j;
	unicode = ISO88591ToUnicode(ascii, 0, unicode);

	if ( unicode && unicode->data()[0] != j ) {
	printf("l12u test failed\n");
	return 1;
	}
	}

	wbuf[0] = 0xC0F3; // negative test
	latin1 = unicodeToISO88591(uni, 0, latin1);
	if ( latin1 )
	printf("negative u2l1 test failed\n");

	printf("test pass\n");

	return 0;
	}

	#endif