| /************************************************************************** |
| * |
| * codecvt1.cpp - Example program of codecvt facet. |
| * |
| * $Id$ |
| * |
| *************************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed |
| * with this work for additional information regarding copyright |
| * ownership. The ASF licenses this file to you under the Apache |
| * License, Version 2.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * permissions and limitations under the License. |
| * |
| * Copyright 1994-2006 Rogue Wave Software. |
| * |
| **************************************************************************/ |
| |
| #include <algorithm> |
| #include <fstream> |
| #include <iostream> |
| #include <iomanip> |
| #include <locale> |
| #include <numeric> |
| |
| #include <cstring> // for memcmp(), ptrdiff_t |
| #include <cwchar> // for mbstate_t |
| #include <examples.h> |
| |
| |
| #define TMP_BUFFER_SIZE 1024 |
| |
| // Japanese week days (short/abbreviated and long/normal form) in EUC-JP, |
| // Shift_JIS and UTF-8 encodings; |
| const char wd_eucjp [] = { |
| "\xc6\xfc\x0a\xc6\xfc\xcd\xcb\xc6\xfc\x0a\xb7\xee\x0a\xb7\xee\xcd" |
| "\xcb\xc6\xfc\x0a\xb2\xd0\x0a\xb2\xd0\xcd\xcb\xc6\xfc\x0a\xbf\xe5" |
| "\x0a\xbf\xe5\xcd\xcb\xc6\xfc\x0a\xcc\xda\x0a\xcc\xda\xcd\xcb\xc6" |
| "\xfc\x0a\xb6\xe2\x0a\xb6\xe2\xcd\xcb\xc6\xfc\x0a\xc5\xda\x0a\xc5" |
| "\xda\xcd\xcb\xc6\xfc\x0a" |
| }; |
| |
| const char wd_sjis [] = { |
| "\x93\xfa\x0a\x93\xfa\x97\x6a\x93\xfa\x0a\x8c\x8e\x0a\x8c\x8e\x97" |
| "\x6a\x93\xfa\x0a\x89\xce\x0a\x89\xce\x97\x6a\x93\xfa\x0a\x90\x85" |
| "\x0a\x90\x85\x97\x6a\x93\xfa\x0a\x96\xd8\x0a\x96\xd8\x97\x6a\x93" |
| "\xfa\x0a\x8b\xe0\x0a\x8b\xe0\x97\x6a\x93\xfa\x0a\x93\x79\x0a\x93" |
| "\x79\x97\x6a\x93\xfa\x0a" |
| }; |
| |
| const char wd_utf8 [] = { |
| "\xe6\x97\xa5\x0a\xe6\x97\xa5\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe6\x9c" |
| "\x88\x0a\xe6\x9c\x88\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe7\x81\xab\x0a" |
| "\xe7\x81\xab\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe6\xb0\xb4\x0a\xe6\xb0" |
| "\xb4\xe6\x9b\x9c\xe6\x97\xa5\x0a\xe6\x9c\xa8\x0a\xe6\x9c\xa8\xe6" |
| "\x9b\x9c\xe6\x97\xa5\x0a\xe9\x87\x91\x0a\xe9\x87\x91\xe6\x9b\x9c" |
| "\xe6\x97\xa5\x0a\xe5\x9c\x9f\x0a\xe5\x9c\x9f\xe6\x9b\x9c\xe6\x97" |
| "\xa5\x0a" |
| }; |
| |
| const struct { |
| const char* name; |
| const char* buffer; |
| } locales [] = { |
| { "ja_JP.UTF-8" , wd_utf8 }, |
| { "ja_JP.Shift_JIS", wd_sjis }, |
| { "ja_JP.EUC-JP" , wd_eucjp } |
| }; |
| |
| std::mbstate_t init = std::mbstate_t (); |
| |
| // Conversion from external representation of characters to internal |
| // representation of characters and back to external representation |
| // and checking of the transformation correctness after round-trip. |
| void do_roundtrip_conversion (const char* locname, const char* buffer) |
| { |
| typedef std::codecvt_byname<wchar_t,char,std::mbstate_t> wcodecvt_byname_t; |
| |
| wchar_t wtmp [TMP_BUFFER_SIZE]; |
| char tmp [TMP_BUFFER_SIZE]; |
| |
| std::mbstate_t state = init; |
| std::codecvt_base::result res; |
| |
| // create the named facet |
| std::locale loc (locname); |
| const wcodecvt_byname_t& cc = |
| std::use_facet<wcodecvt_byname_t> (loc); |
| |
| // set the pointers |
| const char* cpe = buffer; |
| const char* cpen = buffer; |
| const char* cpe_limit = buffer + std::strlen (buffer); |
| |
| wchar_t* pi = wtmp; |
| wchar_t* pin = wtmp; |
| wchar_t* pi_limit = wtmp + TMP_BUFFER_SIZE; |
| |
| // convert characters in external buffer to internal representation |
| res = cc.in (state, cpe, cpe_limit, cpen, pi, pi_limit, pin); |
| |
| // set the pointers and adjust the pi_limit pointer to after |
| // the last successfully converted character |
| const wchar_t* cpi = pi; |
| const wchar_t* cpin = pi; |
| const wchar_t* cpi_limit = pin; |
| |
| char* pe = tmp; |
| char* pen = tmp; |
| char* pe_limit = tmp + TMP_BUFFER_SIZE; |
| |
| // convert the characters in internal representation to external |
| // representation and compare the result with the original buffer |
| res = cc.out (state, cpi, cpi_limit, cpin, pe, pe_limit, pen); |
| std::cout << locname << " -> INT -> " << locname << '\n'; |
| |
| std::cout << " Size comparison of buffers yields " |
| << (std::codecvt_base::ok == res && (cpen - cpe == pen - pe) |
| ? "equal\n" : "not equal\n"); |
| |
| const int cmp = std::memcmp (cpe, pe, std::min ((cpen - cpe), (pen - pe))); |
| std::cout << " Content comparison of buffers yields " |
| << (cmp ? "not equal\n" : "equal\n"); |
| } |
| |
| |
| void do_diff_conversion () |
| { |
| typedef std::codecvt_byname<wchar_t,char,std::mbstate_t> wcodecvt_byname_t; |
| |
| wchar_t wtmp [TMP_BUFFER_SIZE]; |
| char tmp [TMP_BUFFER_SIZE]; |
| |
| std::codecvt_base::result res; |
| |
| // Conversion from external representation of characters to internal |
| // representation of characters using two different facets obtained |
| // from two different locales: |
| // 1. External to internal: codecvt_byname facet from ja_JP.EUC-JP locale |
| // transforms the content of wd_eucjp to internal representation; |
| // 2. Internal to external: codecvt_byname facet from ja_JP.UTF-8 locale |
| // transforms the internal representation of wd_eucjp to an external |
| // representation that uses UTF-8 encoding; |
| // 3. Compare the size and content of the final result with size and |
| // content of the wd_utf8 buffer. |
| const std::locale loc1 ("ja_JP.EUC-JP"); |
| const std::locale loc2 ("ja_JP.UTF-8"); |
| |
| const wcodecvt_byname_t& cc1 = std::use_facet<wcodecvt_byname_t> (loc1); |
| const wcodecvt_byname_t& cc2 = std::use_facet<wcodecvt_byname_t> (loc2); |
| |
| std::mbstate_t state1 = init; |
| std::mbstate_t state2 = init; |
| |
| const std::size_t len = std::strlen (wd_utf8); |
| |
| // set the pointers |
| const char* cpe = wd_eucjp; |
| const char* cpen = wd_eucjp; |
| const char* cpe_limit = wd_eucjp + std::strlen (wd_eucjp); |
| |
| wchar_t* pi = wtmp; |
| wchar_t* pin = wtmp; |
| wchar_t* pi_limit = wtmp + TMP_BUFFER_SIZE; |
| |
| // convert external buffer to internal representation |
| res = cc1.in (state1, cpe, cpe_limit, cpen, pi, pi_limit, pin); |
| |
| // set the pointers and adjust the pi_limit pointer to after |
| // the last converted character |
| const wchar_t* cpi = pi; |
| const wchar_t* cpin = pi; |
| const wchar_t* cpi_limit = pin; |
| |
| char* pe = tmp; |
| char* pen = tmp; |
| char* pe_limit = tmp + TMP_BUFFER_SIZE; |
| |
| res = cc2.out (state2, cpi, cpi_limit, cpin, pe, pe_limit, pen); |
| |
| std::cout << "\nEUC-JP -> INT -> UTF-8 conversion\n"; |
| std::cout << " Size comparison of buffers yields " |
| << (std::codecvt_base::ok == res && std::size_t (pen - pe) == len |
| ? " " : "not ") |
| << "equal\n"; |
| |
| const int cmp = |
| std::memcmp (pe, wd_utf8, std::min ((pen - pe), std::ptrdiff_t (len))); |
| |
| std::cout << " Content comparison of buffers yields " |
| << (cmp ? "not equal\n" : "equal\n"); |
| } |
| |
| |
| int main () |
| { |
| try { |
| for (int i = 0; i != sizeof locales / sizeof *locales; i++) |
| do_roundtrip_conversion (locales [i].name, locales [i].buffer); |
| |
| do_diff_conversion (); |
| } |
| catch (const std::exception &e) { |
| std::cout << "Caught an exception: " << e.what () << std::endl; |
| |
| return 1; // Indicate failure. |
| } |
| catch (...) { |
| std::cout << "Caught an unknown exception" << std::endl; |
| |
| return 1; // Indicate failure. |
| } |
| |
| return 0; |
| } |