blob: 3835e00e155e40ac86b2a7c5b5c9001f649bad3a [file] [log] [blame]
* codecvt.cpp - test exercising file streams and code conversion
* $Id$
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* implied. See the License for the specific language governing
* permissions and limitations under the License.
* Copyright 2001-2006 Rogue Wave Software.
// basic_ofstream<>::overflow() mangles a multibyte sequence when using
// code conversion. This functionality of the class isn't currently (as
// of the date of the creation of the test) being exercised by our
// testsuite due to the lack of a suitable locale (e.g., ja_JP).
// test creates a temporary file and fills it with series of pairs
// <offset, seq>, where `offset' is the offset from the beginning of
// the file to the first (decimal) digit of offset, and `seq' is
// a character in the range [1, CHAR_MAX] possibly converted to an
// escape or trigraph sequence according to the rules described
// in 2.3 and 2.13.2
// the program then exercises the stream's (actually, the stream
// file buffer's) ability to extract and seek within such a file
// thus testing the stream's ability to crrectly interact with
// the codecvt facet installed in the imbued locale
#include <rw/_defs.h>
#if defined (__IBMCPP__) && !defined (_RWSTD_NO_IMPLICIT_INCLUSION)
// disable implicit inclusion to work around a limitation
// in IBM VisualAge
#include <fstream>
#include <sstream>
#include <climits> // for UCHAR_MAX
#include <cstdio> // for fclose(), fopen(), fseek(), size_t
#include <cstdlib> // for abort()
#include <cstring> // for memset(), strcmp(), strlen()
#include <cwchar> // for mbstate_t
#include <driver.h>
#include <file.h>
// code conversion facet suitable for replacement of the default
// codecvt<char, char, mbstate_t> facet
// cformat::do_out() converts printable ASCII characters into themselves,
// control characters are converted to standard C escape sequences
// cformat::do_in() reverses the effect of do_out()
class cformat: public std::codecvt <char, char, std::mbstate_t>
enum {
new_line = 0x0001, // convert '\n' to "\n"
horizontal_tab = 0x0002, // convert '\t' to "\t"
vertical_tab = 0x0004, // convert '\v' to "\v"
backspace = 0x0008, // convert '\b' to "\b"
carriage_return = 0x0010, // convert '\r' to "\r"
form_feed = 0x0020, // convert '\f' to "\f"
alert = 0x0040, // convert '\a' to "\a"
backslash = 0x0080, // convert '\\' to "\\"
question_mark = 0x0100, // convert '?' to "\?"
single_quote = 0x0200, // convert '\\'' to "\'"
double_quote = 0x0400, // convert '"' to "\""
trigraphs = 0x8000, // convert to/from trigrap sequences
hex = 0x1000 // hex notation in external representation
const int mask; // bitmaps of flags above
cformat (std::size_t ref = 0, int m = 0)
: std::codecvt<char, char, std::mbstate_t> (ref),
mask (m) { /* empty */ }
virtual result
do_out (state_type&,
const intern_type*, const intern_type*,
const intern_type*&,
extern_type*, extern_type*, extern_type*&) const;
virtual result
do_in (state_type&, const extern_type*,
const extern_type*, const extern_type*&,
intern_type*, intern_type*, intern_type*&) const;
virtual result
do_unshift (state_type&, extern_type*,
extern_type*, extern_type*&) const {
// stateless encoding, no conversion necessary
return noconv;
virtual int
do_encoding () const _THROWS (()) {
return 0; // variable number of external chars per single internal
virtual bool
do_always_noconv () const _THROWS (()) {
return false; // conversion always necessary
// returns the maximum `N' of extern chars in the range [from, from_end)
// such that N represents max or fewer internal chars
virtual int
do_length (state_type&, const extern_type*,
const extern_type*, std::size_t) const;
// returns the max value do_length (s, from, from_end, 1) can return
// for any valid range [from, from_end) - see LWG issue 74 (a DR)
virtual int
do_max_length () const _THROWS (()) {
// assume that an internal char occupies at most 4 external chars
// this won't hold for e.g. '\x00001' etc., but will hold for all
// chars in the hex notation of up to two digits and all chars in
// octal notation (which are required to fit in 4 by the standard)
return 4;
cformat::do_out ( state_type& /* unused */,
const intern_type *from,
const intern_type *from_end,
const intern_type *&from_next,
extern_type *to,
extern_type *to_end,
extern_type *&to_next) const
// assert, p1 preconditions
rw_assert (from <= from_end, __FILE__, __LINE__,
"codecvt::do_out (..., from = %#p, from + %d, %#p, "
"to = %#p, to + %d, %#p): from <= from_end",
from, from_end - from, from_next, to, to_end - to, to_next);
rw_assert (to <= to_end, __FILE__, __LINE__,
"codecvt::do_out (..., from = %#p, from + %d, %#p, "
"to = %#p, to + %d, %#p): to <= to_end",
from, from_end - from, from_next, to, to_end - to, to_next);
// assume no conversion will be performed
result res = noconv;
for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
// out of space
if (to_next == to_end) {
res = partial;
// convert to unsigned to make sure comparison works
unsigned char ch = *from_next;
extern_type esc = extern_type ();
if (ch < ' ') {
// convert to a C escape sequence
switch (ch) {
case '\a':
if (!(mask & alert)) {
ch = 'a';
esc = '\\';
case '\b':
if (!(mask & backspace)) {
ch = 'b';
esc = '\\';
case '\t':
if (!(mask & horizontal_tab)) {
ch = 't';
esc = '\\';
case '\n':
if (!(mask & new_line)) {
ch = 'n';
esc = '\\';
case '\v':
if (!(mask & vertical_tab)) {
ch = 'v';
esc = '\\';
case '\f':
if (!(mask & form_feed)) {
ch = 'f';
esc = '\\';
case '\r':
if (!(mask & carriage_return)) {
ch = 'r';
esc = '\\';
case '\\':
if (!(mask & backslash)) {
ch = '\\';
esc = '\\';
esc = '\\';
else if (ch > '~') {
// convert to a C escape sequence (octal)
esc = '\\';
else {
// escape special characters
switch (ch) {
case '?':
if (!(mask & question_mark))
esc = '\\';
case '\'':
if (!(mask & single_quote))
esc = '\\';
case '"':
if (!(mask & double_quote))
esc = '\\';
case '\\':
if (!(mask & backslash))
esc = '\\';
if (!(mask & trigraphs)) {
// convert to a trigraph sequence
switch (ch) {
case '#': ch = '='; esc = '?'; break;
case '\\': ch = '/'; esc = '?'; break;
case '^': ch = '\''; esc = '?'; break;
case '[': ch = '('; esc = '?'; break;
case ']': ch = ')'; esc = '?'; break;
case '|': ch = '!'; esc = '?'; break;
case '{': ch = '<'; esc = '?'; break;
case '}': ch = '>'; esc = '?'; break;
case '~': ch = '-'; esc = '?'; break;
// process `ch' and `esc'
if ('\\' == esc) {
// conversion was performed
res = ok;
if (ch < ' ' || ch > '~') {
// need room for an escape followed by three ocal digits
if (4 > to_end - to_next) {
res = partial;
static const char digits[] = "0123456789abcdef";
// add an escape character
*to_next++ = esc;
if (mask & hex) {
// add hex representation (exactly three chars)
*to_next++ = 'x';
*to_next++ = digits [(ch & 0xf0) >> 4];
*to_next++ = digits [ch & 0xf];
else {
// add octal representation (exactly three digits)
*to_next++ = digits [(ch & (7 << 6)) >> 6];
*to_next++ = digits [(ch & (7 << 3)) >> 3];
*to_next++ = digits [ch & 7];
else {
// need room for an escape followed by a single char
if (2 > to_end - to_next) {
res = partial;
// add an escape char followed by the escaped char
*to_next++ = esc;
*to_next++ = ch;
else if ('?' == esc) {
// need room for a trigraph sequence
if (3 > to_end - to_next) {
res = partial;
// conversion was performed
res = ok;
// add a trigraph sequence
*to_next++ = '?';
*to_next++ = '?';
*to_next++ = ch;
else {
// not escaped
*to_next++ = ch;
if (noconv == res) {
//, p2, Note: no conversion was necessary
from_next = from;
to_next = to;
rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
"user-defined codecvt: internal inconsistency");
rw_fatal (to_next >= to && to_next <= to_end, 0, __LINE__,
"user-defined codecvt: internal inconsistency");
return res;
cformat::do_in ( state_type& /* unused */,
const extern_type *from,
const extern_type *from_end,
const extern_type *&from_next,
intern_type *to,
intern_type *to_end,
intern_type *&to_next) const
// assert, p1 preconditions
rw_assert (from <= from_end, __FILE__, __LINE__,
"codecvt::do_in (..., from = %#p, from + %d, %#p, "
"to = %#p, to + %d, %#p): from <= from_end",
from, from_end - from, from_next, to, to_end - to, to_next);
rw_assert (to <= to_end, __FILE__, __LINE__,
"codecvt::do_in (..., from = %#p, from + %d, %#p, "
"to = %#p, to + %d, %#p) to <= to_end",
from, from_end - from, from_next, to, to_end - to, to_next);
result res = ok;
for (from_next = from, to_next = to; from_next != from_end; ++from_next) {
unsigned char ch = *from_next;
intern_type c = intern_type ();
if ('\\' == ch) {
if (2 > from_end - from_next) {
// ok is the correct value to return in this case,
// but partial should be handled as well for robustness
res = (from_end - (extern_type*)0) % 2 ? ok : partial;
ch = from_next [1];
if ('x' == ch) {
// interpret a hex escape sequence
// advance past '\x'
const extern_type *next = from_next + 2;
// parse hex digits until a non-hex digits is encountered
for (; ; ++next) {
if (next == from_end) {
// do not advance to the end since there may be
// more digits following it (e.g., '\x012' with
// from_end pointing at '1' or '2')
return partial;
ch = *next;
if (ch >= '0' && ch <= '9')
c = (c << 4) | (ch - '0');
else if (ch >= 'a' && ch <= 'f')
c = (c << 4) | (ch - 'a' + 10);
else if (ch >= 'A' && ch <= 'F')
c = (c << 4) | (ch - 'A' + 10);
else if (next - from_next > 2)
else {
return error; // non-hex digit immediately after '\x'
// advance to the end of parsed number
from_next = next - 1;
else if ('0' <= ch && '7' >= ch) {
// interpret a oct escape sequence
// (tentatively) advance past '\'
const extern_type *next = from_next + 1;
// parse at most three oct digits
for (; next - from_next < 4; ++next) {
if (next == from_end) {
// do not advance to the end since there may be
// more digits following it (e.g., '\x012' with
// from_end pointing at '1' or '2')
return partial;
ch = *next;
if (ch >= '0' && ch <= '7')
c = (c << 3) | (ch - '0');
else if (next - from_next)
else {
// advance to the offending char
from_next = next;
return error; // non-oct digit immediately after '\'
// advance to the end of parsed number
from_next = next - 1;
else {
// interpret standard C escape sequence
switch (ch) {
case 'a': c = '\a'; break;
case 'b': c = '\b'; break;
case 't': c = '\t'; break;
case 'n': c = '\n'; break;
case 'v': c = '\v'; break;
case 'f': c = '\f'; break;
case 'r': c = '\r'; break;
// optional but allowed and escaped backslash
case '?': case '"': case '\'': case '\\': c = ch ; break;
// bad escape sequence
default: return error;
// advance past the initial '\'
else if ('?' == ch && !(mask & trigraphs)) {
// (try to) convert a trigraph sequence
if ( 2 > from_end - from_next
|| '?' == from_next [1] && 3 > from_end - from_next) {
res = partial;
if ('?' == from_next [1]) {
// "??" (potentilly) introduces a trigraph sequence
switch (from_next [2]) {
case '=': c = '#'; break;
case '/': c = '\\'; break;
case '\'': c = '^'; break;
case '(': c = '['; break;
case ')': c = ']'; break;
case '!': c = '|'; break;
case '<': c = '{'; break;
case '>': c = '}'; break;
case '-': c = '~'; break;
// no a trigraph sequence, won't convert
c = from_next [0]; // i.e., '?'
// skip the leading "??" of a trigraph sequence
if (c != from_next [0])
from_next += 2;
// ordinary (not escaped) character
c = ch;
// ordinary (not escaped) character
c = ch;
// to_next may be 0 (when called from do_length())
// doing pointer math on invalid pointers (null) has undefined behavior
// but will probably work in most cases
if (to_next)
*to_next = c;
// in case of of the inner loops has reached end
if (from_next == from_end)
rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
"user-defined codecvt: internal inconsistency");
rw_fatal (to_next >= to && (to_next <= to_end || !to_end), 0, __LINE__,
"user-defined codecvt: internal inconsistency");
return res;
cformat::do_length (state_type&,
const extern_type *from,
const extern_type *from_end,
std::size_t max) const
const extern_type *from_next;
intern_type *to_next = 0;
std::mbstate_t st;
std::memset (&st, 0, sizeof st);
// use do_in() with `to' of 0 to do the computation
// doing pointer math on invalid pointers (null) has undefined behavior
// but will probably work in most cases
do_in (st, from, from_end, from_next,
to_next, to_next + max, to_next);
return to_next - (intern_type*)0;
// determines file size in bytes
static std::streamsize
fsize (const char *fname)
std::FILE* const f = std::fopen (fname, "r");
if (!f || std::fseek (f, 0, SEEK_END))
return -1;
const std::streamsize size = std::ftell (f);
std::fclose (f);
return size;
static void
self_test ()
rw_info (0, __FILE__, __LINE__,
"user-defined codecvt facet -- self test");
static const char* const result[] = {
"ok", "partial", "error", "noconv"
// user-defined code conversion facet
cformat fmt (1);
// original array of internal characters and one to which to convert
// an external representation back to (for comparison)
cformat::intern_type intrn [2][256] = { { '\0' } };
// array of external chars large enough to hold the internal array
// each internal char converts to at most 4 external chars
cformat::extern_type extrn [1024] = { '\0' };
// fill internal array with chars from '\1' to '\377'
for (std::size_t i = 0; i != sizeof intrn [0] - 1; ++i)
intrn [0][i] = cformat::intern_type (i + 1);
const cformat::intern_type *intrn_next_0 = 0;
cformat::intern_type *intrn_next_1 = 0;
cformat::extern_type *extrn_next = 0;
// dummy (state not used, conversion is stateless)
std::mbstate_t st;
std::memset (&st, 0, sizeof st);
// convert internal to external representation, substituting
// escape sequences for non-printable characters
std::codecvt_base::result res;
// convert array in internal representation to external representation
res = fmt.out (st,
intrn [0], intrn [0] + sizeof intrn [0], intrn_next_0,
extrn, extrn + sizeof extrn, extrn_next);
rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
"codecvt::out (); result == codecvt_base::ok, "
"got codecvt_base::%s", result [res]);
// assert that the external sequence is longer than the internal one
rw_assert (extrn_next - extrn > intrn_next_0 - intrn [0], 0, __LINE__,
"codecvt::out (); converted size %d, expected > %d",
extrn_next - extrn, intrn_next_0 - intrn [0]);
// convert external to internal representation, parsing
// multi-char escape sequences into single chars
const cformat::intern_type *next = extrn;
intrn_next_1 = intrn [1];
for (; next != extrn_next; ) {
// allow only a small buffer space to exercise partial conversion
std::size_t step = std::size_t (extrn_next - next);
if (step > 12)
step = 5 + step % 8;
res = (st,
next, next + step, next,
intrn_next_1, intrn [1] + sizeof intrn [1], intrn_next_1);
if (std::codecvt_base::error == res)
// assert that entrire sequence converted ok
rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
"codecvt::in (); result == codecvt_base::ok, got "
"codecvt_base::%s at offset %d", result [res], next - extrn);
rw_assert (intrn_next_1 == intrn [1] + sizeof intrn [1], 0, __LINE__,
"codecvt::in (); to_next == %#p, got %#p",
intrn [1] + sizeof intrn [1], intrn_next_1);
rw_assert (0 == std::strcmp (intrn [0], intrn [1]), 0, __LINE__,
"codecvt<>::out/in ()");
#define RW_ASSERT_STATE(strm, state) \
rw_assert ((strm).rdstate () == (state), 0, __LINE__, \
"rdstate () == %{Is}, got %{Is}", \
(state), (strm).rdstate ())
static void
test_noconv (const char *fname)
rw_info (0, 0, __LINE__, "ifstream extraction without conversion");
std::ifstream f (fname);
// make sure file stream has been successfully opened
RW_ASSERT_STATE (f, std::ios::goodbit);
// gain public access to protected members
struct pubbuf: std::streambuf {
// working around an MSVC 6.0 bug (PR #26330)
typedef std::streambuf Base;
virtual std::streamsize showmanyc () {
return Base::showmanyc ();
char* pubgptr () {
return Base::gptr ();
char* pubegptr () {
return Base::egptr ();
// use static cast through void* to avoid using reinterpret_cast
pubbuf *rdbuf = _RWSTD_STATIC_CAST (pubbuf*, (void*)f.rdbuf ());
std::streamsize filesize = rdbuf->showmanyc ();
// exercise, p1: showmanyc() returns the "estimated"
// size of the sequence (i.e., the file size in this case)
rw_assert (filesize == fsize (fname), 0, __LINE__,
"streambuf::showmanyc () == %ld, got %ld",
fsize (fname), filesize);
// exercise, p1
filesize = f.rdbuf ()->in_avail ();
const char *gptr = rdbuf->pubgptr ();
const char *egptr = rdbuf->pubgptr ();
rw_assert (filesize == (gptr < egptr ? egptr - gptr : fsize (fname)),
0, __LINE__,
"streambuf::in_avail () == %ld, got %ld",
(gptr < egptr ? egptr - gptr : fsize (fname)), filesize);
// allocate buffer large enough to accomodate the converted
// (i.e. internal) sequence
const std::size_t bufsize = 0x10000; // 64k should do it
char *tmpbuf = new char [bufsize];
// fill with non-0 value to check for writes past the end
// (see also Onyx incident 14033)
std::memset (tmpbuf, '\x7f', bufsize);
// ecercise putback area
std::streamsize i;
# define _RWSTD_PBACK_SIZE 1
// _RWSTD_PBACK_SIZE is the size of the putback area the library
// was configured with; the macro expands to streamsize (N)
for (i = 0; i != _RWSTD_PBACK_SIZE + 1; ++i) {
// read a few characters, read must not append a '\0'
std::streamsize n = (tmpbuf, i).gcount ();
// assert that read exactly `i' chars, buffer not null-terminared
rw_assert (i == n && '\x7f' == tmpbuf [i], 0, __LINE__,
"ifstream::read (%#p, %ld) read %ld,"
"buffer terminated with '\\%03o'",
tmpbuf, i, n, tmpbuf [i]);
// put back read characters; assert that they are the same
// as those in the corresponding positions in the buffer
for (std::streamsize j = 0; j != i; ++j) {
std::ifstream::int_type c = f.rdbuf ()->sungetc ();
typedef std::ifstream::traits_type Traits;
rw_assert (Traits::to_int_type (tmpbuf [n - j - 1]) == c,
0, __LINE__,
"filebuf::sungetc() == '\\%03o', got '\\%03o",
tmpbuf [n - j - 1], c);
// re-read characters just put back
char buf [_RWSTD_PBACK_SIZE + 1];
std::memset (buf, '\x7f', sizeof buf);
std::streamsize n2 = (buf, i).gcount ();
// assert that the requested number of chars were read in
rw_assert (i == n2 && '\x7f' == buf [i], 0, __LINE__,
"ifstream::read (%#p, %ld) read %ld,"
"buffer terminated with '\\%03o'",
buf, i, n2, buf [i]);
// assert that the read chars are those that were put back
for (std::streamsize k = 0; k != i; ++k) {
rw_assert (buf [k] == tmpbuf [k], 0, __LINE__,
"buffer mismatch at offset %ld: got '\\%03o', "
"expected '\\%03o'", k, buf [k], tmpbuf [k]);
// put character back again so that it can be read back in
f.rdbuf ()->sungetc ();
// read file contents into buffer (apply no conversion)
const std::streamsize n = (tmpbuf, bufsize).gcount ();
//, p28 - read() sets eofbit | failbit
// if end-of-file occurs on the input sequence
RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
// assert that the entiire file has been read
rw_assert (n == filesize && '\x7f' == tmpbuf [n], 0, __LINE__,
"ifstream::read (%#p, %ld) read %ld, expected %ld; "
"buffer terminated with '\\%03o' [%s]",
tmpbuf, bufsize, n, filesize, tmpbuf [n], fname);
tmpbuf [n] = '\0';
// assert that file contains no control characters
bool b = true;
for (i = 0; b && i != UCHAR_MAX + 1; ++i) {
if (i >= ' ' || i <= '~')
b = 0 == std::strchr (tmpbuf, char (i));
rw_assert (b, 0, __LINE__,
"unescaped non-printable character '\\#03o' at offset %ld",
tmpbuf [i], i);
delete[] tmpbuf;
static void
test_error (const char *fname)
rw_info (0, 0, __LINE__, "ifstream extraction with a conversion error");
const char outbuf[] = {
// ^
// |
// error (invalid hex sequence) ---+
// write out a text file containing a conversion error
std::ofstream ostrm (fname);
ostrm << outbuf;
ostrm.close ();
// read the file back in using the conversion facet
std::ifstream istrm (fname);
// user-defined code conversion facet
const cformat fmt (1 /* prevent locale from deleting */);
// create a locale by combining the classic locale and our UD facet
// cformat; the facet will replace std::codecvt<char, char, mbstate_t>
std::locale l (std::locale::classic (), &fmt);
// imbue locale with formatting facet into streams and save previous
l = istrm.imbue (l);
char inbuf [sizeof outbuf * 4] = { 0 };
// try to read partial contents of the file
// including the conversion error into the buffer (inbuf, 26L + 4L /* "a..z" <space> <error> "AB" */);
// verify that the operation failed, eofbit is set since
// less than the requested number of characters have been read
RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
// verify that the 26 characters 'a' through 'z' plus
// the space (i.e., 27 chars) have been extracted
rw_assert (27 == istrm.gcount (), 0, __LINE__,
"ifstream::read () extracted %d, expected 27",
istrm.gcount ());
rw_assert ( 0 == std::ifstream::traits_type::compare (inbuf, outbuf, 26)
&& ' ' == inbuf [26], 0, __LINE__,
"ifstream::read () got \"%s\", expected \"%.26s \"",
inbuf, outbuf);
istrm.clear ();
// try to read again, and verify that the operation fails (inbuf, sizeof inbuf);
RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);
rw_assert (0 == istrm.gcount (), 0, __LINE__,
"ifstream::read () extracted %d, expected 0",
istrm.gcount ());
// replace the imbued locale
istrm.imbue (l);
static void
test_seek (const char *fname,
std::size_t bufsize = std::size_t (-1))
std::ifstream f (fname);
// make sure stream has been successfully opened
RW_ASSERT_STATE (f, std::ios::goodbit);
// set buffer size if specified
if (std::size_t (-1) != bufsize) {
rw_info (0, 0, __LINE__,
"ifstream::seekg()/tellg() - %zu byte buffer", bufsize);
RW_ASSERT_STATE (f, std::ios::goodbit);
f.rdbuf ()->pubsetbuf (0, bufsize);
else {
rw_info (0, 0, __LINE__,
"ifstream::seekg()/tellg() - default buffer size");
// user-defined code conversion facet
const cformat fmt (1 /* prevent locale from deleting */);
// create a locale by combining the classic locale and our UD facet
// cformat; the facet will replace std::codecvt<char, char, mbstate_t>
std::locale l (std::locale::classic (), &fmt);
// imbue locale with formatting facet into streams and save previous
l = f.imbue (l);
// seek to the beginning of stream (safe)
f.seekg (0);
RW_ASSERT_STATE (f, std::ios::goodbit);
const unsigned char max = UCHAR_MAX - '~';
for (std::size_t n = 0; n != std::size_t (max - 1); ++n) {
const char delim = char ('~' + n);
// skip over chars until the terminating delim (and extract it)
f.ignore (0x10000, std::fstream::traits_type::to_int_type (delim));
rw_assert (f.good (), 0, __LINE__,
"istream::ignore (0x10000, '\\%03o'); "
"rdstate() = %{Is}, gcount() = %ld",
delim, f.rdstate (), f.gcount ());
// alternate between exercising seekg() and pubsync()
if (n % 2) {
// seek inplace (offset must be 0 for MB encodings)
f.seekg (0, std::ios::cur);
RW_ASSERT_STATE (f, std::ios::goodbit);
else {
// filebuf::pubsync() must return 0
int syn = f.rdbuf ()->pubsync ();
rw_assert (0 == syn, 0, __LINE__,
"filebuf::pubsync () == 0, got %d", syn);
// skip exactly one char forward (retrieve a space)
char c = char ();
f.get (c);
RW_ASSERT_STATE (f, std::ios::goodbit);
rw_assert (' ' == c, 0, __LINE__,
"istream::get(char_type) got '\\%03o', expected ' '", c);
// get current file position
const std::ifstream::pos_type pos = f.tellg ();
RW_ASSERT_STATE (f, std::ios::goodbit);
// extract offset - should be the same as pos
long offset = 0;
f >> offset;
RW_ASSERT_STATE (f, std::ios::goodbit);
rw_assert (long (pos) == offset, 0, __LINE__,
"ifstream::operator>>() expected %ld, got %ld",
long (pos), offset);
// in_avail() must return a value > 0
std::streamsize avail = f.rdbuf ()->in_avail ();
rw_assert (avail > 0, 0, __LINE__,
"filebuf::in_avail() expected > 0, got %ld", avail);
// "rewind" stream to the beginning
f.seekg (0);
RW_ASSERT_STATE (f, std::ios::goodbit);
// try seeking to the previous position
f.seekg (pos);
RW_ASSERT_STATE (f, std::ios::goodbit);
rw_assert (f.tellg () == pos, 0, __LINE__,
"istream::seekg (%ld); tellg () returns %ld",
long (pos), long (f.tellg ()));
// re-read offset - should be the same as file pos
f >> offset;
RW_ASSERT_STATE (f, std::ios::goodbit);
rw_assert (long (pos) == offset, 0, __LINE__,
"ifstream::operator>>() expected %ld, got %ld",
long (pos), offset);
// ignore the rest of file, eofbit must be set
f.ignore (0x10000);
RW_ASSERT_STATE (f, std::ios::eofbit);
// in_avail() must return 0
const std::streamsize avail = f.rdbuf ()->in_avail ();
rw_assert (0 == avail, 0, __LINE__,
"filebuf::in_avail() expected 0, got %ld", avail);
// imbue original locale (currently imbued locale
// will be destroyed prior to the destruction of `f')
f.imbue (l);
static int
run_test (int, char*[])
// self-test make sure facet works
self_test ();
// user-defined code conversion facet
const cformat fmt (1 /* prevent locale from deleting */);
// create a locale by combining the classic locale and our UD facet
// cformat; the facet will replace std::codecvt<char, char, mbstate_t>
std::locale l (std::locale::classic (), &fmt);
const char *fname = rw_tmpnam (0);
if (!fname)
return 1;
// will be populated with file offsets and escape sequences
char buffer [4096] = { '\0' };
int buflen = 0;
// generate file contents using UD conversion
if (1) {
rw_info (0, 0, __LINE__,
"ofstream insertion with multibyte conversion");
std::ofstream f (fname);
// make sure file stream has been successfully opened
RW_ASSERT_STATE (f, std::ios::goodbit);
// imbue locale with formatting facet into stream
f.imbue (l);
for (std::size_t i = 1; i != UCHAR_MAX + 1U; ++i) {
const std::ofstream::pos_type pos = f.tellp ();
RW_ASSERT_STATE (f, std::ios::goodbit);
buflen = std::strlen (buffer);
// append the file offset followed by a (possibly escaped) char
std::sprintf (buffer + buflen, "%ld %c ", long (pos), char (i));
// write out the just appended portion of the buffer
f << (buffer + buflen);
RW_ASSERT_STATE (f, std::ios::goodbit);
buflen = std::strlen (buffer);
// file contains the contents of buffer with non-printable
// chars replaced with escape sequences (e.g., tabs with '\t', etc.)
// read contents of file w/o conversion
test_noconv (fname);
// read contents of file, apply conversion
if (1) {
rw_info (0, 0, __LINE__,
"ifstream extraction with multibyte conversion");
std::ifstream f (fname);
// make sure file stream has been successfully opened
RW_ASSERT_STATE (f, std::ios::goodbit);
// imbue locale with formatting facet into stream
f.imbue (l);
// allocate buffer large enough to accomodate the converted
// (i.e. internal) sequence
char tmpbuf [sizeof buffer];
// read file contents into buffer, convert escape sequences
// into the corresponding (perhaps unprintable) characters
const std::streamsize n = (tmpbuf, sizeof tmpbuf).gcount ();
//, p28 - read() sets eofbit | failbit
// if end-of-file occurs on the input sequence
RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);
rw_assert (long (n) == buflen, 0, __LINE__,
"ifstream::read (%#p, %d); read %ld, expected %d",
tmpbuf, sizeof tmpbuf, long (n), buflen);
// assert that converted file contents are the same
// as the originally generated buffer
const long len = long (n) < buflen ? long (n) : buflen;
for (long i = 0; i != len; ++i) {
if (tmpbuf [i] != buffer [i]) {
rw_assert (0, 0, __LINE__,
"'\\%03o' == '\\%03o'; offset %d",
(unsigned char)buffer [i],
(unsigned char)tmpbuf [i], i);
// test with default buffer
test_seek (fname);
// retest with buffer of user-defined size
for (std::size_t n = 4096; n != std::size_t (-1);
n -= 1024 < n ? 1024 : 256 < n ? 256 : 16 < n ? 16 : 1)
test_seek (fname, n);
// test with errors during conversion
test_error (fname);
// remove a temporary file
std::remove (fname);
return 0;
int main (int argc, char *argv[])
return rw_test (argc, argv, __FILE__,
0 /* no comment */,
"", 0);