blob: f5a58774548e41af93a489bfef77ece36f2beb91 [file] [log] [blame]
/** \file cp2ucnvrt.hpp .
-----------------------------------------------------------------------------
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
-----------------------------------------------------------------------------
\brief Contains CodePage2UnicodeConverter a class to convert from different codepages to UNICODE
Description:
-----------------------------------------------------------------------------
5/21/1999 Initial creation
8/18/1999 CodePage2UTF8Converter added
-------------------------------------------------------------------------- */
#ifndef UIMA_CP2UCNVRT_HPP
#define UIMA_CP2UCNVRT_HPP
/* ----------------------------------------------------------------------- */
/* Include dependencies */
/* ----------------------------------------------------------------------- */
#include "uima/pragmas.hpp" // must be first file to be included to get pragmas
#include "uima/exceptions.hpp"
#include "unicode/ucnv.h"
/* ----------------------------------------------------------------------- */
/* Constants */
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Forward declarations */
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
/* Types / Classes */
/* ----------------------------------------------------------------------- */
namespace uima {
/**
* The class <TT>CodePage2UnicodeConverter</TT> converts all characters
* in a specified buffer into UNICODE (CCSID: CCSID::EnCCSID_UCS2)
*
* @see Unicode2CodePageConverter CodePage2UTF8Converter
*/
class UIMA_LINK_IMPORTSPEC CodePage2UnicodeConverter {
public:
/** @name Constructors */
/*@{*/
/** Create an instance for a converter from CCSID <TT>crclCCSID</TT> to
CCSID CCSID::EnCCSID_UCS2. */
CodePage2UnicodeConverter(const char * crConverterName);
~CodePage2UnicodeConverter();
/** TODO: fix
* implementation of convertBytes is probably wrong.
* used in taf_doc_buffer.cpp and taf_htmlparser.cpp to support test suite
*
*/
/*@}*/
/** @name Miscellaneous */
/*@{*/
/** From this CCSID, convert the specified source buffer <TT>cpacSource</TT> with size
<TT>uiSourceSize</TT> in bytes, to target buffer <TT>pclTarget</TT>
with size <TT>uiTargetMaxSize</TT> in bytes to the UNICODE
CCSID (CCSID::EnCCSID_UCS2), and return
the number of bytes converted. */
size_t convertBytes(UChar * pclTarget, size_t uiTargetMaxSize, const char * cpacSource, size_t uiSourceSize) ;
/*@}*/
/** the maximum size in bytes required when converted to Unicode
using this code page converter */
size_t getMaximumLength(const char * cpacSource, size_t uiSourceLength) const;
/** Using this code page converter, convert the specified source buffer
<TT>cpacSource</TT> with size <TT>uiSourceSize</TT> in bytes, to target
buffer <TT>pclTarget</TT> with size <TT>uiTargetMaxSize</TT> in bytes to
UNICODE and return the number of unicode characters converted. */
size_t convertToUnicode(UChar * pclTarget, size_t uiTargetMaxLength,
const char * cpacSource, size_t uiSourceLength);
/*@}*/
protected:
/* --- functions --- */
private:
/* --- functions --- */
/* BASE CONSTRUCTOR NOT SUPPORTED */
CodePage2UnicodeConverter(void); //lint !e1704
/* COPY CONSTRUCTOR NOT SUPPORTED */
CodePage2UnicodeConverter(const CodePage2UnicodeConverter & ); //lint !e1704
/* ASSIGNMENT OPERATOR NOT SUPPORTED */
CodePage2UnicodeConverter & operator=(const CodePage2UnicodeConverter & crclObject);
/* --- variables --- */
UConverter * iv_uconverter;
}
; /* CodePage2UnicodeConverter */
}
/* ----------------------------------------------------------------------- */
/* Implementation */
/* ----------------------------------------------------------------------- */
/* ----------------------------------------------------------------------- */
#endif /* UIMA_CP2UCNVRT_HPP */
/* <EOF> */