| /************************************************************************** |
| * |
| * insert_wchar.cpp |
| * |
| * Example program demonstrating an implementation of an inserter |
| * operator overloaded for arrays of wchar_t that performs codeset |
| * conversion from wchar_t to mutlibyte characters. |
| * |
| * $Id$ |
| * |
| *************************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed |
| * with this work for additional information regarding copyright |
| * ownership. The ASF licenses this file to you under the Apache |
| * License, Version 2.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * permissions and limitations under the License. |
| * |
| **************************************************************************/ |
| |
| #include <cassert> // for assert() |
| #include <cwchar> // for mbstate_t, size_t |
| #include <ios> // for hex |
| #include <iostream> // for cout |
| #include <locale> // for codecvt, isalnum(), locale |
| #include <ostream> // for basic_ostream |
| #include <sstream> // for ostringstream |
| |
| |
| // inserts a wide character string into a stream buffer performing |
| // codeset conversion if necessary |
| template <class charT, class Traits> |
| void |
| streambuf_insert (std::basic_ostream<charT, Traits> &strm, |
| const wchar_t *s) |
| { |
| typedef typename Traits::state_type StateT; |
| typedef std::codecvt<wchar_t, charT, StateT> Codecvt; |
| |
| const Codecvt &cvt = std::use_facet<Codecvt>(strm.getloc ()); |
| |
| const std::size_t slen = std::char_traits<wchar_t>::length (s); |
| |
| // perform codeset conversion in chunks to avoid dynamic |
| // memory allocation |
| |
| const std::size_t xbufsize = 32; |
| |
| charT xbuf [xbufsize]; |
| charT* xbuf_end = xbuf + xbufsize; |
| charT* to_next = 0; |
| const wchar_t* from_next = 0; |
| const wchar_t* const end = s + slen; |
| |
| StateT state = StateT (); |
| |
| for (const wchar_t* base = s; from_next != end; base = from_next) { |
| |
| const std::codecvt_base::result res = |
| cvt.out (state, base, end, from_next, |
| xbuf, xbuf_end, to_next); |
| |
| std::streamsize nbytes = to_next - xbuf; |
| |
| switch (res) { |
| case Codecvt::error: |
| // write out the sequence successfully converted up |
| // to the point of the error in the internal sequence |
| // and fail |
| strm.rdbuf ()->sputn (xbuf, nbytes); |
| strm.setstate (strm.badbit); |
| |
| case Codecvt::noconv: |
| // write the entire sequence |
| if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) { |
| strm.setstate (strm.badbit); |
| return; |
| } |
| |
| from_next = end; // effectively break |
| break; |
| |
| default: |
| assert (cvt.ok == res || cvt.partial == res); |
| |
| // partial conversion will result if there isn't enough |
| // space in the conversion buffer to hold the converted |
| // sequence, but we're O.K. since we'll be passing any |
| // remaining unconverted characters (starting at |
| // from_next) in the next iteration |
| |
| nbytes = to_next - xbuf; |
| |
| if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) { |
| strm.setstate (strm.badbit); |
| return; |
| } |
| } |
| } |
| } |
| |
| |
| // stream insertion operator overloaded for arrays of wchar_t characters |
| template <class charT, class Traits> |
| std::basic_ostream<charT, Traits>& |
| operator<< (std::basic_ostream<charT, Traits> &strm, |
| const wchar_t *s) |
| { |
| const typename std::basic_ostream<charT, Traits>::sentry opfx (strm); |
| |
| if (opfx) { |
| |
| try { |
| // try to insert character array into stream buffer |
| streambuf_insert (strm, s); |
| } |
| catch (...) { |
| bool threw; |
| try { |
| // set badbit on exception without throwing ios::failure |
| strm.setstate (strm.badbit); |
| threw = false; |
| } |
| catch (std::ios_base::failure&) { |
| // make a note of the exception thrown from setstate()... |
| threw = true; |
| } |
| if (threw) { |
| // ...and rethrow the original exception |
| throw; |
| } |
| } |
| } |
| |
| return strm; |
| } |
| |
| |
| // examples of wide character strings |
| static const wchar_t* const wcs [] = { |
| L"a", L"abc", |
| // Greek letter Alpha: |
| L"\x0391", // "\xce\x91" |
| // Greek letters Alpha Beta: |
| L"\x0391\x0392", // "\xce\x91\xce\x91\xce\x92" |
| // Greek letters Alpha Beta Gamma: |
| L"\x0391\x0392\x0393", // "\xce\x91\xce\x92\xce\x93" |
| // Tibetan digit zero: |
| L"\x0f20", // "\xe0\xbc\xa0" |
| // Tibetan digits one, zero: |
| L"\x0f21\x0f20", // "\xe0\xbc\xa1\xe0\xbc\xa0" |
| // Tibetan digits two, one, zero: |
| L"\x0f22\x0f21\x0f20" // "\xe0\xbc\xa2\xe0\xbc\xa1\xe0\xbc\xa0" |
| }; |
| |
| |
| int main () |
| { |
| typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> Codecvt; |
| |
| // create a UCS/UTF-8 codecvt facet and install it in a locale |
| const std::locale utf (std::cout.getloc (), new Codecvt ("UTF-8@UCS")); |
| |
| for (std::size_t i = 0; i != sizeof wcs / sizeof *wcs; ++i) { |
| |
| std::ostringstream strm; |
| |
| // imbue the UTF-8/UCS capable locale in a stringstream |
| strm.imbue (utf); |
| |
| // insert each wide character string into the narrow stream |
| // object relying on the inserter to convert each wide string |
| // into the corresponding multibyte character string |
| strm << wcs [i]; |
| |
| // write out the wide character string in Unicode notation |
| std::cout << "UCS-2: " << std::hex; |
| |
| for (const wchar_t *pwc = wcs [i]; *pwc != L'\0'; ++pwc) |
| std::cout << "U+" << unsigned (*pwc) << ' '; |
| |
| const std::string str = strm.str (); |
| |
| std::cout << " ==> UTF-8: \""; |
| |
| typedef unsigned char UChar; |
| |
| // write out the the multibyte character sequence using |
| // ordinary aphanumeric symbols or hex escape sequences |
| // where necessary |
| for (const char *pc = str.c_str (); *pc != '\0'; ++pc) { |
| |
| // parenthesize isalnum to prevent macro expension |
| // in case the function happens to be (illegally) |
| // shadowed by a macro |
| if ((std::isalnum)(*pc, std::cout.getloc ())) |
| std::cout << *pc; |
| else |
| std::cout << "\\x" << int (UChar (*pc)); |
| } |
| |
| std::cout << "\"\n"; |
| } |
| } |