blob: ac58d3f1453c16d5c54111b65f65a488a097d2b4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <log4cxx/logstring.h>
#include <log4cxx/helpers/transcoder.h>
#include <log4cxx/helpers/pool.h>
#include <stdlib.h>
#include <log4cxx/helpers/exception.h>
#include <log4cxx/helpers/bytebuffer.h>
#include <log4cxx/helpers/charsetdecoder.h>
#include <log4cxx/helpers/charsetencoder.h>
#include <vector>
#include <apr.h>
#include <apr_strings.h>
#if !defined(LOG4CXX)
#define LOG4CXX 1
#endif
#include <log4cxx/private/log4cxx_private.h>
#if LOG4CXX_LOGCHAR_IS_UNICHAR || LOG4CXX_CFSTRING_API || LOG4CXX_UNICHAR_API
#include <CoreFoundation/CFString.h>
#endif
using namespace log4cxx;
using namespace log4cxx::helpers;
void Transcoder::decodeUTF8(const std::string& src, LogString& dst)
{
std::string::const_iterator iter = src.begin();
while (iter != src.end())
{
unsigned int sv = decode(src, iter);
if (sv != 0xFFFF)
{
encode(sv, dst);
}
else
{
dst.append(1, LOSSCHAR);
iter++;
}
}
}
void Transcoder::encodeUTF8(const LogString& src, std::string& dst)
{
#if LOG4CXX_LOGCHAR_IS_UTF8
dst.append(src);
#else
LogString::const_iterator iter = src.begin();
while (iter != src.end())
{
unsigned int sv = decode(src, iter);
if (sv != 0xFFFF)
{
encode(sv, dst);
}
else
{
dst.append(1, LOSSCHAR);
iter++;
}
}
#endif
}
char* Transcoder::encodeUTF8(const LogString& src, Pool& p)
{
#if LOG4CXX_LOGCHAR_IS_UTF8
return p.pstrdup(src);
#else
std::string tmp;
encodeUTF8(src, tmp);
return p.pstrdup(tmp);
#endif
}
void Transcoder::encodeUTF8(unsigned int sv, ByteBuffer& dst)
{
size_t bytes = encodeUTF8(sv, dst.current());
dst.position(dst.position() + bytes);
}
size_t Transcoder::encodeUTF8(unsigned int ch, char* dst)
{
if (ch < 0x80)
{
dst[0] = (char) ch;
return 1;
}
else if (ch < 0x800)
{
dst[0] = (char) (0xC0 + (ch >> 6));
dst[1] = (char) (0x80 + (ch & 0x3F));
return 2;
}
else if (ch < 0x10000)
{
dst[0] = (char) (0xE0 + (ch >> 12));
dst[1] = (char) (0x80 + ((ch >> 6) & 0x3F));
dst[2] = (char) (0x80 + (ch & 0x3F));
return 3;
}
else if (ch <= 0x10FFFF)
{
dst[0] = (char) (0xF0 + (ch >> 18));
dst[1] = (char) (0x80 + ((ch >> 12) & 0x3F));
dst[2] = (char) (0x80 + ((ch >> 6) & 0x3F));
dst[3] = (char) (0x80 + (ch & 0x3F));
return 4;
}
else
{
//
// output UTF-8 encoding of 0xFFFF
//
dst[0] = (char) 0xEF;
dst[1] = (char) 0xBF;
dst[2] = (char) 0xBF;
return 3;
}
}
void Transcoder::encodeUTF16BE(unsigned int sv, ByteBuffer& dst)
{
size_t bytes = encodeUTF16BE(sv, dst.current());
dst.position(dst.position() + bytes);
}
size_t Transcoder::encodeUTF16BE(unsigned int ch, char* dst)
{
if (ch <= 0xFFFF)
{
dst[0] = (char) (ch >> 8);
dst[1] = (char) (ch & 0xFF);
return 2;
}
if (ch <= 0x10FFFF)
{
unsigned char w = (unsigned char) ((ch >> 16) - 1);
dst[0] = (char) (0xD8 + (w >> 2));
dst[1] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
dst[2] = (char) (0xDC + ((ch & 0x30) >> 4));
dst[3] = (char) (ch & 0xFF);
return 4;
}
dst[0] = dst[1] = (char) 0xFF;
return 2;
}
void Transcoder::encodeUTF16LE(unsigned int sv, ByteBuffer& dst)
{
size_t bytes = encodeUTF16LE(sv, dst.current());
dst.position(dst.position() + bytes);
}
size_t Transcoder::encodeUTF16LE(unsigned int ch, char* dst)
{
if (ch <= 0xFFFF)
{
dst[1] = (char) (ch >> 8);
dst[0] = (char) (ch & 0xFF);
return 2;
}
if (ch <= 0x10FFFF)
{
unsigned char w = (unsigned char) ((ch >> 16) - 1);
dst[1] = (char) (0xD8 + (w >> 2));
dst[0] = (char) (((w & 0x03) << 6) + ((ch >> 10) & 0x3F));
dst[3] = (char) (0xDC + ((ch & 0x30) >> 4));
dst[2] = (char) (ch & 0xFF);
return 4;
}
dst[0] = dst[1] = (char) 0xFF;
return 2;
}
unsigned int Transcoder::decode(const std::string& src,
std::string::const_iterator& iter)
{
std::string::const_iterator start(iter);
unsigned char ch1 = *(iter++);
if (ch1 <= 0x7F)
{
return ch1;
}
//
// should not have continuation character here
//
if ((ch1 & 0xC0) != 0x80 && iter != src.end())
{
unsigned char ch2 = *(iter++);
//
// should be continuation
if ((ch2 & 0xC0) != 0x80)
{
iter = start;
return 0xFFFF;
}
if ((ch1 & 0xE0) == 0xC0)
{
if ((ch2 & 0xC0) == 0x80)
{
unsigned int rv = ((ch1 & 0x1F) << 6) + (ch2 & 0x3F);
if (rv >= 0x80)
{
return rv;
}
}
iter = start;
return 0xFFFF;
}
if (iter != src.end())
{
unsigned char ch3 = *(iter++);
//
// should be continuation
//
if ((ch3 & 0xC0) != 0x80)
{
iter = start;
return 0xFFFF;
}
if ((ch1 & 0xF0) == 0xE0)
{
unsigned rv = ((ch1 & 0x0F) << 12)
+ ((ch2 & 0x3F) << 6)
+ (ch3 & 0x3F);
if (rv <= 0x800)
{
iter = start;
return 0xFFFF;
}
return rv;
}
if (iter != src.end())
{
unsigned char ch4 = *(iter++);
if ((ch4 & 0xC0) != 0x80)
{
iter = start;
return 0xFFFF;
}
unsigned int rv = ((ch1 & 0x07) << 18)
+ ((ch2 & 0x3F) << 12)
+ ((ch3 & 0x3F) << 6)
+ (ch4 & 0x3F);
if (rv > 0xFFFF)
{
return rv;
}
}
}
}
iter = start;
return 0xFFFF;
}
void Transcoder::encode(unsigned int sv, std::string& dst)
{
char tmp[8];
size_t bytes = encodeUTF8(sv, tmp);
dst.append(tmp, bytes);
}
void Transcoder::decode(const std::string& src, LogString& dst)
{
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
dst.append(src);
#else
static CharsetDecoderPtr decoder(CharsetDecoder::getDefaultDecoder());
dst.reserve(dst.size() + src.size());
std::string::const_iterator iter = src.begin();
#if !LOG4CXX_CHARSET_EBCDIC
for (;
iter != src.end() && ((unsigned char) *iter) < 0x80;
iter++)
{
dst.append(1, *iter);
}
#endif
if (iter != src.end())
{
size_t offset = iter - src.begin();
ByteBuffer buf(const_cast<char*>(src.data() + offset), src.size() - offset);
while (buf.remaining() > 0)
{
log4cxx_status_t stat = decoder->decode(buf, dst);
if (CharsetDecoder::isError(stat))
{
dst.append(1, LOSSCHAR);
buf.position(buf.position() + 1);
}
}
decoder->decode(buf, dst);
}
#endif
}
char* Transcoder::encode(const LogString& src, Pool& p)
{
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
return p.pstrdup(src);
#else
std::string tmp;
encode(src, tmp);
return p.pstrdup(tmp);
#endif
}
void Transcoder::encode(const LogString& src, std::string& dst)
{
#if LOG4CXX_CHARSET_UTF8 && LOG4CXX_LOGCHAR_IS_UTF8
dst.append(src);
#else
static CharsetEncoderPtr encoder(CharsetEncoder::getDefaultEncoder());
dst.reserve(dst.size() + src.size());
LogString::const_iterator iter = src.begin();
#if !LOG4CXX_CHARSET_EBCDIC
for (;
iter != src.end() && ((unsigned int) *iter) < 0x80;
iter++)
{
dst.append(1, *iter);
}
#endif
if (iter != src.end())
{
char buf[BUFSIZE];
ByteBuffer out(buf, BUFSIZE);
while (iter != src.end())
{
log4cxx_status_t stat = encoder->encode(src, iter, out);
out.flip();
dst.append(out.data(), out.limit());
out.clear();
if (CharsetEncoder::isError(stat))
{
dst.append(1, LOSSCHAR);
iter++;
}
}
encoder->encode(src, iter, out);
}
#endif
}
template<class String, class Iterator>
static unsigned int decodeUTF16(const String& in, Iterator& iter)
{
unsigned int ch1 = *iter;
//
// if not surrogate pair
//
if (ch1 < 0xD800 || ch1 > 0xDFFF)
{
//
// then advance iterator and return wchar_t value
//
if (ch1 != 0xFFFF)
{
iter++;
}
return ch1;
}
else if (ch1 < 0xDC00)
{
//
// started with high-surrogate value
// if there is an additional wchar_t
Iterator iter2 = iter + 1;
if (iter2 != in.end())
{
unsigned int ch2 = *iter2;
//
// if it is a matching low surrogate then
// advance the iterator and return the scalar value
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
{
iter += 2;
return (ch1 - 0xD800) * 0x400 + (ch2 - 0xDC00) + 0x10000;
}
}
}
//
// unrecognized value, do not advance iterator
//
return 0xFFFF;
}
template<class String>
static void encodeUTF16(unsigned int sv, String& dst)
{
if (sv < 0x10000)
{
dst.append(1, sv);
}
else
{
unsigned char u = (unsigned char) (sv >> 16);
unsigned char w = (unsigned char) (u - 1);
unsigned short hs = (0xD800 + ((w & 0xF) << 6) + ((sv & 0xFFFF) >> 10));
unsigned short ls = (0xDC00 + (sv & 0x3FF));
dst.append(1, hs);
dst.append(1, ls);
}
}
#if LOG4CXX_WCHAR_T_API || LOG4CXX_LOGCHAR_IS_WCHAR_T || defined(WIN32) || defined(_WIN32)
void Transcoder::decode(const std::wstring& src, LogString& dst)
{
#if LOG4CXX_LOGCHAR_IS_WCHAR_T
dst.append(src, len);
#else
std::wstring::const_iterator i = src.begin();
while (i != src.end())
{
unsigned int cp = decode(src, i);
if (cp != 0xFFFF)
{
encode(cp, dst);
}
else
{
dst.append(1, LOSSCHAR);
i++;
}
}
#endif
}
void Transcoder::encode(const LogString& src, std::wstring& dst)
{
#if LOG4CXX_LOGCHAR_IS_WCHAR_T
dst.append(src);
#else
for (LogString::const_iterator i = src.begin();
i != src.end();)
{
unsigned int cp = Transcoder::decode(src, i);
encode(cp, dst);
}
#endif
}
wchar_t* Transcoder::wencode(const LogString& src, Pool& p)
{
#if LOG4CXX_LOGCHAR_IS_WCHAR_T
std::wstring& tmp = src;
#else
std::wstring tmp;
encode(src, tmp);
#endif
wchar_t* dst = (wchar_t*) p.palloc((tmp.length() + 1) * sizeof(wchar_t));
dst[tmp.length()] = 0;
memcpy(dst, tmp.data(), tmp.length() * sizeof(wchar_t));
return dst;
}
unsigned int Transcoder::decode(const std::wstring& in,
std::wstring::const_iterator& iter)
{
#if defined(__STDC_ISO_10646__)
return *(iter++);
#else
return decodeUTF16(in, iter);
#endif
}
void Transcoder::encode(unsigned int sv, std::wstring& dst)
{
#if defined(__STDC_ISO_10646__)
dst.append(1, sv);
#else
if (sizeof(wchar_t) == 4)
{
dst.append(1, sv);
}
else
{
encodeUTF16(sv, dst);
}
#endif
}
#endif
#if LOG4CXX_UNICHAR_API || LOG4CXX_CFSTRING_API
void Transcoder::decode(const std::basic_string<UniChar>& src, LogString& dst)
{
#if LOG4CXX_LOGCHAR_IS_UNICHAR
dst.append(src);
#else
for (std::basic_string<UniChar>::const_iterator i = src.begin();
i != src.end();)
{
unsigned int cp = decode(src, i);
encode(cp, dst);
}
#endif
}
void Transcoder::encode(const LogString& src, std::basic_string<UniChar>& dst)
{
#if LOG4CXX_LOGCHAR_IS_UNICHAR
dst.append(src);
#else
for (LogString::const_iterator i = src.begin();
i != src.end();)
{
unsigned int cp = decode(src, i);
encode(cp, dst);
}
#endif
}
unsigned int Transcoder::decode(const std::basic_string<UniChar>& in,
std::basic_string<UniChar>::const_iterator& iter)
{
return decodeUTF16(in, iter);
}
void Transcoder::encode(unsigned int sv, std::basic_string<UniChar>& dst)
{
encodeUTF16(sv, dst);
}
#endif
#if LOG4CXX_CFSTRING_API
void Transcoder::decode(const CFStringRef& src, LogString& dst)
{
const UniChar* chars = CFStringGetCharactersPtr(src);
if (chars)
{
decode(chars, dst);
}
else
{
size_t length = CFStringGetLength(src);
if (length > 0)
{
std::vector<UniChar> tmp(length);
CFStringGetCharacters(src, CFRangeMake(0, length), &tmp[0]);
#if LOG4CXX_LOGCHAR_IS_UNICHAR
dst.append(&tmp[0], tmp.size());
#else
decode(std::basic_string<UniChar>(&tmp[0], tmp.size()), dst);
#endif
}
}
}
CFStringRef Transcoder::encode(const LogString& src)
{
LOG4CXX_ENCODE_UNICHAR(tmp, src);
return CFStringCreateWithCharacters(kCFAllocatorDefault, tmp.data(), tmp.size());
}
#endif
logchar Transcoder::decode(char val)
{
#if LOG4CXX_CHARSET_EBCDIC
LogString dst;
Transcoder::decode(std::string(1, val), dst);
return dst[0];
#else
return val;
#endif
}
LogString Transcoder::decode(const char* val)
{
#if LOG4CXX_LOGCHAR_IS_UTF8 && !LOG4CXX_CHARSET_EBCDIC
return val;
#else
LogString dst;
Transcoder::decode(val, dst);
return dst;
#endif
}
std::string Transcoder::encodeCharsetName(const LogString& val)
{
char asciiTable[] = { ' ', '!', '"', '#', '$', '%', '&', '\'', '(', ')', '*', '+', ',', '-', '.', '/',
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?',
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~'
};
std::string out;
for (LogString::const_iterator iter = val.begin();
iter != val.end();
iter++)
{
if (*iter >= 0x20 && *iter < 0x7F)
{
out.append(1, asciiTable[*iter - 0x20]);
}
else
{
out.append(1, LOSSCHAR);
}
}
return out;
}