blob: 395ee754a7b8d0127cb910d7809817f884618a9d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <log4cxx/logstring.h>
#include <log4cxx/helpers/charsetdecoder.h>
#include <log4cxx/helpers/bytebuffer.h>
#include <log4cxx/helpers/exception.h>
#include <log4cxx/helpers/mutex.h>
#include <log4cxx/helpers/synchronized.h>
#include <log4cxx/helpers/pool.h>
#include <apr_xlate.h>
#if !defined(LOG4CXX)
#define LOG4CXX 1
#endif
#include <log4cxx/private/log4cxx_private.h>
#include <locale.h>
#include <apr_portable.h>
#include <log4cxx/helpers/stringhelper.h>
#include <log4cxx/helpers/transcoder.h>
using namespace log4cxx;
using namespace log4cxx::helpers;
IMPLEMENT_LOG4CXX_OBJECT(CharsetDecoder)
namespace log4cxx
{
namespace helpers
{
#if APR_HAS_XLATE
/**
* Converts from an arbitrary encoding to LogString
* using apr_xlate. Requires real iconv implementation,
* apr-iconv will crash in use.
*/
class APRCharsetDecoder : public CharsetDecoder
{
public:
/**
* Creates a new instance.
* @param frompage name of source encoding.
*/
APRCharsetDecoder(const LogString& frompage) : pool(), mutex(pool)
{
#if LOG4CXX_LOGCHAR_IS_WCHAR
const char* topage = "WCHAR_T";
#endif
#if LOG4CXX_LOGCHAR_IS_UTF8
const char* topage = "UTF-8";
#endif
#if LOG4CXX_LOGCHAR_IS_UNICHAR
const char* topage = "UTF-16";
#endif
std::string fpage(Transcoder::encodeCharsetName(frompage));
apr_status_t stat = apr_xlate_open(&convset,
topage,
fpage.c_str(),
pool.getAPRPool());
if (stat != APR_SUCCESS)
{
throw IllegalArgumentException(frompage);
}
}
/**
* Destructor.
*/
virtual ~APRCharsetDecoder()
{
}
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out)
{
enum { BUFSIZE = 256 };
logchar buf[BUFSIZE];
const apr_size_t initial_outbytes_left = BUFSIZE * sizeof(logchar);
apr_status_t stat = APR_SUCCESS;
if (in.remaining() == 0)
{
size_t outbytes_left = initial_outbytes_left;
{
synchronized sync(mutex);
stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
NULL, NULL, (char*) buf, &outbytes_left);
}
out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
}
else
{
while (in.remaining() > 0 && stat == APR_SUCCESS)
{
size_t inbytes_left = in.remaining();
size_t initial_inbytes_left = inbytes_left;
size_t pos = in.position();
apr_size_t outbytes_left = initial_outbytes_left;
{
synchronized sync(mutex);
stat = apr_xlate_conv_buffer((apr_xlate_t*) convset,
in.data() + pos,
&inbytes_left,
(char*) buf,
&outbytes_left);
}
out.append(buf, (initial_outbytes_left - outbytes_left) / sizeof(logchar));
in.position(pos + (initial_inbytes_left - inbytes_left));
}
}
return stat;
}
private:
APRCharsetDecoder(const APRCharsetDecoder&);
APRCharsetDecoder& operator=(const APRCharsetDecoder&);
log4cxx::helpers::Pool pool;
Mutex mutex;
apr_xlate_t* convset;
};
#endif
#if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
/**
* Converts from the default multi-byte string to
* LogString using mbstowcs.
*
*/
class MbstowcsCharsetDecoder : public CharsetDecoder
{
public:
MbstowcsCharsetDecoder()
{
}
virtual ~MbstowcsCharsetDecoder()
{
}
private:
inline log4cxx_status_t append(LogString& out, const wchar_t* buf)
{
out.append(buf);
return APR_SUCCESS;
}
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out)
{
log4cxx_status_t stat = APR_SUCCESS;
enum { BUFSIZE = 256 };
wchar_t buf[BUFSIZE];
mbstate_t mbstate;
memset(&mbstate, 0, sizeof(mbstate));
while (in.remaining() > 0)
{
size_t requested = in.remaining();
if (requested > BUFSIZE - 1)
{
requested = BUFSIZE - 1;
}
memset(buf, 0, BUFSIZE * sizeof(wchar_t));
const char* src = in.current();
if (*src == 0)
{
out.append(1, (logchar) 0);
in.position(in.position() + 1);
}
else
{
size_t converted = mbsrtowcs(buf,
&src,
requested,
&mbstate);
if (converted == (size_t) -1)
{
stat = APR_BADARG;
in.position(src - in.data());
break;
}
else
{
stat = append(out, buf);
in.position(in.position() + requested);
}
}
}
return stat;
}
private:
MbstowcsCharsetDecoder(const MbstowcsCharsetDecoder&);
MbstowcsCharsetDecoder& operator=(const MbstowcsCharsetDecoder&);
};
#endif
/**
* Decoder used when the external and internal charsets
* are the same.
*
*/
class TrivialCharsetDecoder : public CharsetDecoder
{
public:
TrivialCharsetDecoder()
{
}
virtual ~TrivialCharsetDecoder()
{
}
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out)
{
size_t remaining = in.remaining();
if ( remaining > 0)
{
const logchar* src = (const logchar*) (in.data() + in.position());
size_t count = remaining / sizeof(logchar);
out.append(src, count);
in.position(in.position() + remaining);
}
return APR_SUCCESS;
}
private:
TrivialCharsetDecoder(const TrivialCharsetDecoder&);
TrivialCharsetDecoder& operator=(const TrivialCharsetDecoder&);
};
#if LOG4CXX_LOGCHAR_IS_UTF8
typedef TrivialCharsetDecoder UTF8CharsetDecoder;
#else
/**
* Converts from UTF-8 to std::wstring
*
*/
class UTF8CharsetDecoder : public CharsetDecoder
{
public:
UTF8CharsetDecoder()
{
}
virtual ~UTF8CharsetDecoder()
{
}
private:
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out)
{
if (in.remaining() > 0)
{
std::string tmp(in.current(), in.remaining());
std::string::const_iterator iter = tmp.begin();
while (iter != tmp.end())
{
unsigned int sv = Transcoder::decode(tmp, iter);
if (sv == 0xFFFF)
{
size_t offset = iter - tmp.begin();
in.position(in.position() + offset);
return APR_BADARG;
}
else
{
Transcoder::encode(sv, out);
}
}
in.position(in.limit());
}
return APR_SUCCESS;
}
private:
UTF8CharsetDecoder(const UTF8CharsetDecoder&);
UTF8CharsetDecoder& operator=(const UTF8CharsetDecoder&);
};
#endif
/**
* Converts from ISO-8859-1 to LogString.
*
*/
class ISOLatinCharsetDecoder : public CharsetDecoder
{
public:
ISOLatinCharsetDecoder()
{
}
virtual ~ISOLatinCharsetDecoder()
{
}
private:
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out)
{
if (in.remaining() > 0)
{
const unsigned char* src = (unsigned char*) in.current();
const unsigned char* srcEnd = src + in.remaining();
while (src < srcEnd)
{
unsigned int sv = *(src++);
Transcoder::encode(sv, out);
}
in.position(in.limit());
}
return APR_SUCCESS;
}
private:
ISOLatinCharsetDecoder(const ISOLatinCharsetDecoder&);
ISOLatinCharsetDecoder& operator=(const ISOLatinCharsetDecoder&);
};
/**
* Converts from US-ASCII to LogString.
*
*/
class USASCIICharsetDecoder : public CharsetDecoder
{
public:
USASCIICharsetDecoder()
{
}
virtual ~USASCIICharsetDecoder()
{
}
private:
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out)
{
log4cxx_status_t stat = APR_SUCCESS;
if (in.remaining() > 0)
{
const unsigned char* src = (unsigned char*) in.current();
const unsigned char* srcEnd = src + in.remaining();
while (src < srcEnd)
{
unsigned char sv = *src;
if (sv < 0x80)
{
src++;
Transcoder::encode(sv, out);
}
else
{
stat = APR_BADARG;
break;
}
}
in.position(src - (const unsigned char*) in.data());
}
return stat;
}
private:
USASCIICharsetDecoder(const USASCIICharsetDecoder&);
USASCIICharsetDecoder& operator=(const USASCIICharsetDecoder&);
};
/**
* Charset decoder that uses an embedded CharsetDecoder consistent
* with current locale settings.
*/
class LocaleCharsetDecoder : public CharsetDecoder
{
public:
LocaleCharsetDecoder() : pool(), mutex(pool), decoder(), encoding()
{
}
virtual ~LocaleCharsetDecoder()
{
}
virtual log4cxx_status_t decode(ByteBuffer& in,
LogString& out)
{
const char* p = in.current();
size_t i = in.position();
#if !LOG4CXX_CHARSET_EBCDIC
for (; i < in.limit() && ((unsigned int) *p) < 0x80; i++, p++)
{
out.append(1, *p);
}
in.position(i);
#endif
if (i < in.limit())
{
Pool subpool;
const char* enc = apr_os_locale_encoding(subpool.getAPRPool());
{
synchronized sync(mutex);
if (enc == 0)
{
if (decoder == 0)
{
encoding = "C";
decoder = new USASCIICharsetDecoder();
}
}
else if (encoding != enc)
{
encoding = enc;
try
{
LogString e;
Transcoder::decode(encoding, e);
decoder = getDecoder(e);
}
catch (IllegalArgumentException&)
{
decoder = new USASCIICharsetDecoder();
}
}
}
return decoder->decode(in, out);
}
return APR_SUCCESS;
}
private:
Pool pool;
Mutex mutex;
CharsetDecoderPtr decoder;
std::string encoding;
};
} // namespace helpers
} //namespace log4cxx
CharsetDecoder::CharsetDecoder()
{
}
CharsetDecoder::~CharsetDecoder()
{
}
CharsetDecoder* CharsetDecoder::createDefaultDecoder()
{
#if LOG4CXX_CHARSET_UTF8
return new UTF8CharsetDecoder();
#elif LOG4CXX_CHARSET_ISO88591 || defined(_WIN32_WCE)
return new ISOLatinCharsetDecoder();
#elif LOG4CXX_CHARSET_USASCII
return new USASCIICharsetDecoder();
#elif LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_MBSRTOWCS
return new MbstowcsCharsetDecoder();
#else
return new LocaleCharsetDecoder();
#endif
}
CharsetDecoderPtr CharsetDecoder::getDefaultDecoder()
{
static CharsetDecoderPtr decoder(createDefaultDecoder());
//
// if invoked after static variable destruction
// (if logging is called in the destructor of a static object)
// then create a new decoder.
//
if (decoder == 0)
{
return createDefaultDecoder();
}
return decoder;
}
CharsetDecoderPtr CharsetDecoder::getUTF8Decoder()
{
static CharsetDecoderPtr decoder(new UTF8CharsetDecoder());
//
// if invoked after static variable destruction
// (if logging is called in the destructor of a static object)
// then create a new decoder.
//
if (decoder == 0)
{
return new UTF8CharsetDecoder();
}
return decoder;
}
CharsetDecoderPtr CharsetDecoder::getISOLatinDecoder()
{
return new ISOLatinCharsetDecoder();
}
CharsetDecoderPtr CharsetDecoder::getDecoder(const LogString& charset)
{
if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-8"), LOG4CXX_STR("utf-8")) ||
StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF8"), LOG4CXX_STR("utf8")))
{
return new UTF8CharsetDecoder();
}
else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) ||
charset == LOG4CXX_STR("646") ||
StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("US-ASCII"), LOG4CXX_STR("us-ascii")) ||
StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO646-US"), LOG4CXX_STR("iso646-US")) ||
StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ANSI_X3.4-1968"), LOG4CXX_STR("ansi_x3.4-1968")))
{
return new USASCIICharsetDecoder();
}
else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-8859-1"), LOG4CXX_STR("iso-8859-1")) ||
StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-LATIN-1"), LOG4CXX_STR("iso-latin-1")))
{
return new ISOLatinCharsetDecoder();
}
#if APR_HAS_XLATE
return new APRCharsetDecoder(charset);
#else
throw IllegalArgumentException(charset);
#endif
}