| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| #include <log4cxx/logstring.h> |
| #include <log4cxx/helpers/charsetencoder.h> |
| #include <log4cxx/helpers/bytebuffer.h> |
| #include <log4cxx/helpers/exception.h> |
| #include <apr_xlate.h> |
| #include <log4cxx/helpers/stringhelper.h> |
| #include <log4cxx/helpers/transcoder.h> |
| |
| #if !defined(LOG4CXX) |
| #define LOG4CXX 1 |
| #endif |
| |
| #include <log4cxx/private/log4cxx_private.h> |
| #include <apr_portable.h> |
| #include <log4cxx/helpers/mutex.h> |
| #include <log4cxx/helpers/synchronized.h> |
| |
| #ifdef LOG4CXX_HAS_WCSTOMBS |
| #include <stdlib.h> |
| #endif |
| |
| using namespace log4cxx; |
| using namespace log4cxx::helpers; |
| |
| IMPLEMENT_LOG4CXX_OBJECT(CharsetEncoder) |
| |
| namespace log4cxx |
| { |
| |
| namespace helpers |
| { |
| |
| #if APR_HAS_XLATE |
| /** |
| * A character encoder implemented using apr_xlate. |
| */ |
| class APRCharsetEncoder : public CharsetEncoder |
| { |
| public: |
| APRCharsetEncoder(const LogString& topage) : pool(), mutex(pool) |
| { |
| #if LOG4CXX_LOGCHAR_IS_WCHAR |
| const char* frompage = "WCHAR_T"; |
| #endif |
| #if LOG4CXX_LOGCHAR_IS_UTF8 |
| const char* frompage = "UTF-8"; |
| #endif |
| #if LOG4CXX_LOGCHAR_IS_UNICHAR |
| const char* frompage = "UTF-16"; |
| #endif |
| std::string tpage(Transcoder::encodeCharsetName(topage)); |
| apr_status_t stat = apr_xlate_open(&convset, |
| tpage.c_str(), |
| frompage, |
| pool.getAPRPool()); |
| |
| if (stat != APR_SUCCESS) |
| { |
| throw IllegalArgumentException(topage); |
| } |
| } |
| |
| virtual ~APRCharsetEncoder() |
| { |
| } |
| |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| apr_status_t stat; |
| size_t outbytes_left = out.remaining(); |
| size_t initial_outbytes_left = outbytes_left; |
| size_t position = out.position(); |
| |
| if (iter == in.end()) |
| { |
| synchronized sync(mutex); |
| stat = apr_xlate_conv_buffer(convset, NULL, NULL, |
| out.data() + position, &outbytes_left); |
| } |
| else |
| { |
| LogString::size_type inOffset = (iter - in.begin()); |
| apr_size_t inbytes_left = |
| (in.size() - inOffset) * sizeof(LogString::value_type); |
| apr_size_t initial_inbytes_left = inbytes_left; |
| { |
| synchronized sync(mutex); |
| stat = apr_xlate_conv_buffer(convset, |
| (const char*) (in.data() + inOffset), |
| &inbytes_left, |
| out.data() + position, |
| &outbytes_left); |
| } |
| iter += ((initial_inbytes_left - inbytes_left) / sizeof(LogString::value_type)); |
| } |
| |
| out.position(out.position() + (initial_outbytes_left - outbytes_left)); |
| return stat; |
| } |
| |
| private: |
| APRCharsetEncoder(const APRCharsetEncoder&); |
| APRCharsetEncoder& operator=(const APRCharsetEncoder&); |
| Pool pool; |
| Mutex mutex; |
| apr_xlate_t* convset; |
| }; |
| #endif |
| |
| #if LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_WCSTOMBS |
| /** |
| * A character encoder implemented using wcstombs. |
| */ |
| class WcstombsCharsetEncoder : public CharsetEncoder |
| { |
| public: |
| WcstombsCharsetEncoder() |
| { |
| } |
| |
| /** |
| * Converts a wchar_t to the default external multibyte encoding. |
| */ |
| log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| log4cxx_status_t stat = APR_SUCCESS; |
| |
| if (iter != in.end()) |
| { |
| size_t outbytes_left = out.remaining(); |
| size_t position = out.position(); |
| std::wstring::size_type inOffset = (iter - in.begin()); |
| enum { BUFSIZE = 256 }; |
| wchar_t buf[BUFSIZE]; |
| size_t chunkSize = BUFSIZE - 1; |
| |
| if (chunkSize * MB_LEN_MAX > outbytes_left) |
| { |
| chunkSize = outbytes_left / MB_LEN_MAX; |
| } |
| |
| if (chunkSize > in.length() - inOffset) |
| { |
| chunkSize = in.length() - inOffset; |
| } |
| |
| memset(buf, 0, BUFSIZE * sizeof(wchar_t)); |
| memcpy(buf, |
| in.data() + inOffset, |
| chunkSize * sizeof(wchar_t)); |
| size_t converted = wcstombs(out.data() + position, buf, outbytes_left); |
| |
| if (converted == (size_t) -1) |
| { |
| stat = APR_BADARG; |
| |
| // |
| // if unconvertable character was encountered |
| // repeatedly halve source to get fragment that |
| // can be converted |
| for (chunkSize /= 2; |
| chunkSize > 0; |
| chunkSize /= 2) |
| { |
| buf[chunkSize] = 0; |
| converted = wcstombs(out.data() + position, buf, outbytes_left); |
| |
| if (converted != (size_t) -1) |
| { |
| iter += chunkSize; |
| out.position(out.position() + converted); |
| break; |
| } |
| } |
| } |
| else |
| { |
| iter += chunkSize; |
| out.position(out.position() + converted); |
| } |
| } |
| |
| return stat; |
| } |
| |
| |
| |
| private: |
| WcstombsCharsetEncoder(const WcstombsCharsetEncoder&); |
| WcstombsCharsetEncoder& operator=(const WcstombsCharsetEncoder&); |
| }; |
| #endif |
| |
| |
| /** |
| * Encodes a LogString to US-ASCII. |
| */ |
| class USASCIICharsetEncoder : public CharsetEncoder |
| { |
| public: |
| USASCIICharsetEncoder() |
| { |
| } |
| |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| log4cxx_status_t stat = APR_SUCCESS; |
| |
| if (iter != in.end()) |
| { |
| while (out.remaining() > 0 && iter != in.end()) |
| { |
| LogString::const_iterator prev(iter); |
| unsigned int sv = Transcoder::decode(in, iter); |
| |
| if (sv <= 0x7F) |
| { |
| out.put((char) sv); |
| } |
| else |
| { |
| iter = prev; |
| stat = APR_BADARG; |
| break; |
| } |
| } |
| } |
| |
| return stat; |
| } |
| |
| private: |
| USASCIICharsetEncoder(const USASCIICharsetEncoder&); |
| USASCIICharsetEncoder& operator=(const USASCIICharsetEncoder&); |
| }; |
| |
| /** |
| * Converts a LogString to ISO-8859-1. |
| */ |
| class ISOLatinCharsetEncoder : public CharsetEncoder |
| { |
| public: |
| ISOLatinCharsetEncoder() |
| { |
| } |
| |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| log4cxx_status_t stat = APR_SUCCESS; |
| |
| if (iter != in.end()) |
| { |
| while (out.remaining() > 0 && iter != in.end()) |
| { |
| LogString::const_iterator prev(iter); |
| unsigned int sv = Transcoder::decode(in, iter); |
| |
| if (sv <= 0xFF) |
| { |
| out.put((char) sv); |
| } |
| else |
| { |
| iter = prev; |
| stat = APR_BADARG; |
| break; |
| } |
| } |
| } |
| |
| return stat; |
| } |
| |
| private: |
| ISOLatinCharsetEncoder(const ISOLatinCharsetEncoder&); |
| ISOLatinCharsetEncoder& operator=(const ISOLatinCharsetEncoder&); |
| }; |
| |
| /** |
| * Encodes a LogString to a byte array when the encodings are identical. |
| */ |
| class TrivialCharsetEncoder : public CharsetEncoder |
| { |
| public: |
| TrivialCharsetEncoder() |
| { |
| } |
| |
| |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| if (iter != in.end()) |
| { |
| size_t requested = in.length() - (iter - in.begin()); |
| |
| if (requested > out.remaining() / sizeof(logchar)) |
| { |
| requested = out.remaining() / sizeof(logchar); |
| } |
| |
| memcpy(out.current(), |
| (const char*) in.data() + (iter - in.begin()), |
| requested * sizeof(logchar)); |
| iter += requested; |
| out.position(out.position() + requested * sizeof(logchar)); |
| } |
| |
| return APR_SUCCESS; |
| } |
| |
| private: |
| TrivialCharsetEncoder(const TrivialCharsetEncoder&); |
| TrivialCharsetEncoder& operator=(const TrivialCharsetEncoder&); |
| }; |
| |
| #if LOG4CXX_LOGCHAR_IS_UTF8 |
| typedef TrivialCharsetEncoder UTF8CharsetEncoder; |
| #else |
| /** |
| * Converts a LogString to UTF-8. |
| */ |
| class UTF8CharsetEncoder : public CharsetEncoder |
| { |
| public: |
| UTF8CharsetEncoder() |
| { |
| } |
| |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| while (iter != in.end() && out.remaining() >= 8) |
| { |
| unsigned int sv = Transcoder::decode(in, iter); |
| |
| if (sv == 0xFFFF) |
| { |
| return APR_BADARG; |
| } |
| |
| Transcoder::encodeUTF8(sv, out); |
| } |
| |
| return APR_SUCCESS; |
| } |
| |
| private: |
| UTF8CharsetEncoder(const UTF8CharsetEncoder&); |
| UTF8CharsetEncoder& operator=(const UTF8CharsetEncoder&); |
| }; |
| #endif |
| |
| /** |
| * Encodes a LogString to UTF16-BE. |
| */ |
| class UTF16BECharsetEncoder : public CharsetEncoder |
| { |
| public: |
| UTF16BECharsetEncoder() |
| { |
| } |
| |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| while (iter != in.end() && out.remaining() >= 4) |
| { |
| unsigned int sv = Transcoder::decode(in, iter); |
| |
| if (sv == 0xFFFF) |
| { |
| return APR_BADARG; |
| } |
| |
| Transcoder::encodeUTF16BE(sv, out); |
| } |
| |
| return APR_SUCCESS; |
| } |
| |
| private: |
| UTF16BECharsetEncoder(const UTF16BECharsetEncoder&); |
| UTF16BECharsetEncoder& operator=(const UTF16BECharsetEncoder&); |
| }; |
| |
| /** |
| * Encodes a LogString to UTF16-LE. |
| */ |
| class UTF16LECharsetEncoder : public CharsetEncoder |
| { |
| public: |
| UTF16LECharsetEncoder() |
| { |
| } |
| |
| |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| while (iter != in.end() && out.remaining() >= 4) |
| { |
| unsigned int sv = Transcoder::decode(in, iter); |
| |
| if (sv == 0xFFFF) |
| { |
| return APR_BADARG; |
| } |
| |
| Transcoder::encodeUTF16LE(sv, out); |
| } |
| |
| return APR_SUCCESS; |
| } |
| private: |
| UTF16LECharsetEncoder(const UTF16LECharsetEncoder&); |
| UTF16LECharsetEncoder& operator=(const UTF16LECharsetEncoder&); |
| }; |
| |
| /** |
| * Charset encoder that uses an embedded CharsetEncoder consistent |
| * with current locale settings. |
| */ |
| class LocaleCharsetEncoder : public CharsetEncoder |
| { |
| public: |
| LocaleCharsetEncoder() : pool(), mutex(pool), encoder(), encoding() |
| { |
| } |
| virtual ~LocaleCharsetEncoder() |
| { |
| } |
| virtual log4cxx_status_t encode(const LogString& in, |
| LogString::const_iterator& iter, |
| ByteBuffer& out) |
| { |
| #if !LOG4CXX_CHARSET_EBCDIC |
| char* current = out.current(); |
| size_t remain = out.remaining(); |
| |
| for (; |
| iter != in.end() && ((unsigned int) *iter) < 0x80 && remain > 0; |
| iter++, remain--, current++) |
| { |
| *current = *iter; |
| } |
| |
| out.position(current - out.data()); |
| #endif |
| |
| if (iter != in.end() && out.remaining() > 0) |
| { |
| Pool subpool; |
| const char* enc = apr_os_locale_encoding(subpool.getAPRPool()); |
| { |
| synchronized sync(mutex); |
| |
| if (enc == 0) |
| { |
| if (encoder == 0) |
| { |
| encoding = "C"; |
| encoder = new USASCIICharsetEncoder(); |
| } |
| } |
| else if (encoding != enc) |
| { |
| encoding = enc; |
| LogString ename; |
| Transcoder::decode(encoding, ename); |
| |
| try |
| { |
| encoder = CharsetEncoder::getEncoder(ename); |
| } |
| catch (IllegalArgumentException&) |
| { |
| encoder = new USASCIICharsetEncoder(); |
| } |
| } |
| } |
| return encoder->encode(in, iter, out); |
| } |
| |
| return APR_SUCCESS; |
| } |
| |
| private: |
| LocaleCharsetEncoder(const LocaleCharsetEncoder&); |
| LocaleCharsetEncoder& operator=(const LocaleCharsetEncoder&); |
| Pool pool; |
| Mutex mutex; |
| CharsetEncoderPtr encoder; |
| std::string encoding; |
| }; |
| |
| |
| } // namespace helpers |
| |
| } //namespace log4cxx |
| |
| |
| |
| CharsetEncoder::CharsetEncoder() |
| { |
| } |
| |
| CharsetEncoder::~CharsetEncoder() |
| { |
| } |
| |
| CharsetEncoderPtr CharsetEncoder::getDefaultEncoder() |
| { |
| static CharsetEncoderPtr encoder(createDefaultEncoder()); |
| |
| // |
| // if invoked after static variable destruction |
| // (if logging is called in the destructor of a static object) |
| // then create a new decoder. |
| // |
| if (encoder == 0) |
| { |
| return createDefaultEncoder(); |
| } |
| |
| return encoder; |
| } |
| |
| CharsetEncoder* CharsetEncoder::createDefaultEncoder() |
| { |
| #if LOG4CXX_CHARSET_UTF8 |
| return new UTF8CharsetEncoder(); |
| #elif LOG4CXX_CHARSET_ISO88591 |
| return new ISOLatinCharsetEncoder(); |
| #elif LOG4CXX_CHARSET_USASCII |
| return new USASCIICharsetEncoder(); |
| #elif LOG4CXX_LOGCHAR_IS_WCHAR && LOG4CXX_HAS_WCSTOMBS |
| return new WcstombsCharsetEncoder(); |
| #else |
| return new LocaleCharsetEncoder(); |
| #endif |
| } |
| |
| |
| CharsetEncoderPtr CharsetEncoder::getUTF8Encoder() |
| { |
| return new UTF8CharsetEncoder(); |
| } |
| |
| |
| |
| CharsetEncoderPtr CharsetEncoder::getEncoder(const LogString& charset) |
| { |
| if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-8"), LOG4CXX_STR("utf-8"))) |
| { |
| return new UTF8CharsetEncoder(); |
| } |
| else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("C"), LOG4CXX_STR("c")) || |
| charset == LOG4CXX_STR("646") || |
| StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("US-ASCII"), LOG4CXX_STR("us-ascii")) || |
| StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO646-US"), LOG4CXX_STR("iso646-US")) || |
| StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ANSI_X3.4-1968"), LOG4CXX_STR("ansi_x3.4-1968"))) |
| { |
| return new USASCIICharsetEncoder(); |
| } |
| else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-8859-1"), LOG4CXX_STR("iso-8859-1")) || |
| StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("ISO-LATIN-1"), LOG4CXX_STR("iso-latin-1"))) |
| { |
| return new ISOLatinCharsetEncoder(); |
| } |
| else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16BE"), LOG4CXX_STR("utf-16be")) |
| || StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16"), LOG4CXX_STR("utf-16"))) |
| { |
| return new UTF16BECharsetEncoder(); |
| } |
| else if (StringHelper::equalsIgnoreCase(charset, LOG4CXX_STR("UTF-16LE"), LOG4CXX_STR("utf-16le"))) |
| { |
| return new UTF16LECharsetEncoder(); |
| } |
| |
| #if APR_HAS_XLATE |
| return new APRCharsetEncoder(charset); |
| #else |
| throw IllegalArgumentException(charset); |
| #endif |
| } |
| |
| |
| void CharsetEncoder::reset() |
| { |
| } |
| |
| void CharsetEncoder::flush(ByteBuffer& /* out */ ) |
| { |
| } |
| |
| |
| void CharsetEncoder::encode(CharsetEncoderPtr& enc, |
| const LogString& src, |
| LogString::const_iterator& iter, |
| ByteBuffer& dst) |
| { |
| log4cxx_status_t stat = enc->encode(src, iter, dst); |
| |
| if (stat != APR_SUCCESS && iter != src.end()) |
| { |
| #if LOG4CXX_LOGCHAR_IS_WCHAR || LOG4CXX_LOGCHAR_IS_UNICHAR |
| iter++; |
| #elif LOG4CXX_LOGCHAR_IS_UTF8 |
| |
| // advance past this character and all continuation characters |
| while ((*(++iter) & 0xC0) == 0x80); |
| |
| #else |
| #error logchar is unrecognized |
| #endif |
| dst.put(Transcoder::LOSSCHAR); |
| } |
| } |