tests/iostream/27.filebuf.codecvt.cpp - stdcxx - Git at Google

 /************************************************************************
  *
  * codecvt.cpp - test exercising file streams and code conversion
  *
  * $Id$
  *
  ************************************************************************
  *
  * Licensed to the Apache Software  Foundation (ASF) under one or more
  * contributor  license agreements.  See  the NOTICE  file distributed
  * with  this  work  for  additional information  regarding  copyright
  * ownership.   The ASF  licenses this  file to  you under  the Apache
  * License, Version  2.0 (the  "License"); you may  not use  this file
  * except in  compliance with the License.   You may obtain  a copy of
  * the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the  License is distributed on an  "AS IS" BASIS,
  * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
  * implied.   See  the License  for  the  specific language  governing
  * permissions and limitations under the License.
  *
  * Copyright 2001-2006 Rogue Wave Software.
  *
  **************************************************************************/

 // PROBLEM DESCRIPTION:
 //   basic_ofstream<>::overflow() mangles a multibyte sequence when using
 //   code conversion. This functionality of the class isn't currently (as
 //   of the date of the creation of the test) being exercised by our
 //   testsuite due to the lack of a suitable locale (e.g., ja_JP).

 // TEST DESCRIPTION:
 //   test creates a temporary file and fills it with series of pairs
 //   <offset, seq>, where `offset' is the offset from the beginning of
 //   the file to the first (decimal) digit of offset, and `seq' is
 //   a character in the range [1, CHAR_MAX] possibly converted to an
 //   escape or trigraph sequence according to the rules described
 //   in 2.3 and 2.13.2
 //
 //   the program then exercises the stream's (actually, the stream
 //   file buffer's) ability to extract and seek within such a file
 //   thus testing the stream's ability to crrectly interact with
 //   the codecvt facet installed in the imbued locale


 #include <rw/_defs.h>
 #if defined (__IBMCPP__) && !defined (_RWSTD_NO_IMPLICIT_INCLUSION)
    // disable implicit inclusion to work around a limitation
    // in IBM VisualAge
 #  define _RWSTD_NO_IMPLICIT_INCLUSION
 #endif


 #include <fstream>
 #include <sstream>

 #include <climits>   // for UCHAR_MAX
 #include <cstdio>    // for fclose(), fopen(), fseek(), size_t
 #include <cstdlib>   // for abort()
 #include <cstring>   // for memset(), strcmp(), strlen()
 #include <cwchar>    // for mbstate_t

 #include <rw_driver.h>
 #include <rw_file.h>

 /**************************************************************************/

 // code conversion facet suitable for replacement of the default
 // codecvt<char, char, mbstate_t> facet
 // cformat::do_out() converts printable ASCII characters into themselves,
 // control characters are converted to standard C escape sequences
 // cformat::do_in() reverses the effect of do_out()
 class cformat: public std::codecvt <char, char, std::mbstate_t>
 {
     enum {
         new_line         = 0x0001,  // convert '\n' to "\n"
         horizontal_tab   = 0x0002,  // convert '\t' to "\t"
         vertical_tab     = 0x0004,  // convert '\v' to "\v"
         backspace        = 0x0008,  // convert '\b' to "\b"
         carriage_return  = 0x0010,  // convert '\r' to "\r"
         form_feed        = 0x0020,  // convert '\f' to "\f"
         alert            = 0x0040,  // convert '\a' to "\a"
         backslash        = 0x0080,  // convert '\\' to "\\"
         question_mark    = 0x0100,  // convert '?' to "\?"
         single_quote     = 0x0200,  // convert '\\'' to "\'"
         double_quote     = 0x0400,  // convert '"' to "\""
         trigraphs        = 0x8000,  // convert to/from trigrap sequences
         hex              = 0x1000   // hex notation in external representation
     };

     const int mask;   // bitmaps of flags above

 public:
     explicit
     cformat (std::size_t ref = 0, int m = 0)
         : std::codecvt<char, char, std::mbstate_t> (ref),
           mask (m) { /* empty */ }

 protected:

     virtual result
     do_out (state_type&,
             const intern_type*, const intern_type*,
             const intern_type*&,
             extern_type*, extern_type*, extern_type*&) const;

     virtual result
     do_in (state_type&, const extern_type*,
            const extern_type*, const extern_type*&,
            intern_type*, intern_type*, intern_type*&) const;

     virtual result
     do_unshift (state_type&, extern_type*,
                 extern_type*, extern_type*&) const {
         // stateless encoding, no conversion necessary
         return noconv;
     }

     virtual int
     do_encoding () const _THROWS (()) {
         return 0;   // variable number of external chars per single internal
     }

     virtual bool
     do_always_noconv () const _THROWS (()) {
         return false;   // conversion always necessary
     }

     // returns the maximum `N' of extern chars in the range [from, from_end)
     // such that N represents max or fewer internal chars
     virtual int
     do_length (state_type&, const extern_type*,
                const extern_type*, std::size_t) const;

     // returns the max value do_length (s, from, from_end, 1) can return
     // for any valid range [from, from_end) - see LWG issue 74 (a DR)
     virtual int
     do_max_length () const _THROWS (()) {
         // assume that an internal char occupies at most 4 external chars
         // this won't hold for e.g. '\x00001' etc., but will hold for all
         // chars in the hex notation of up to two digits and all chars in
         // octal notation (which are required to fit in 4 by the standard)
         return 4;
     }
 };


 std::codecvt_base::result
 cformat::do_out (      state_type& /* unused */,
                  const intern_type *from,
                  const intern_type *from_end,
                  const intern_type *&from_next,
                        extern_type *to,
                        extern_type *to_end,
                        extern_type *&to_next) const
 {
     // assert 22.2.1.5.2, p1 preconditions
     rw_assert (from <= from_end, __FILE__, __LINE__,
                "codecvt::do_out (..., from = %#p, from + %d, %#p, "
                "to = %#p, to + %d, %#p): from <= from_end",
                from, from_end - from, from_next, to, to_end - to, to_next);

     rw_assert (to <= to_end, __FILE__, __LINE__,
                "codecvt::do_out (..., from = %#p, from + %d, %#p, "
                "to = %#p, to + %d, %#p): to <= to_end",
                from, from_end - from, from_next, to, to_end - to, to_next);

     // assume no conversion will be performed
     result res = noconv;

     for (from_next = from, to_next = to; from_next != from_end; ++from_next) {

         // out of space
         if (to_next == to_end) {
             res = partial;
             break;
         }

         // convert to unsigned to make sure comparison works
         unsigned char ch = *from_next;
         extern_type esc  = extern_type ();

         if (ch < ' ') {

             // convert to a C escape sequence
             switch (ch) {

             case '\a':
                 if (!(mask & alert)) {
                     ch  = 'a';
                     esc = '\\';
                 }
                 break;

             case '\b':
                 if (!(mask & backspace)) {
                     ch  = 'b';
                     esc = '\\';
                 }
                 break;

             case '\t':
                 if (!(mask & horizontal_tab)) {
                     ch  = 't';
                     esc = '\\';
                 }
                 break;

             case '\n':
                 if (!(mask & new_line)) {
                     ch  = 'n';
                     esc = '\\';
                 }
                 break;

             case '\v':
                 if (!(mask & vertical_tab)) {
                     ch  = 'v';
                     esc = '\\';
                 }
                 break;

             case '\f':
                 if (!(mask & form_feed)) {
                     ch  = 'f';
                     esc = '\\';
                 }
                 break;

             case '\r':
                 if (!(mask & carriage_return)) {
                     ch = 'r';
                     esc = '\\';
                 }
                 break;

             case '\\':
                 if (!(mask & backslash)) {
                     ch  = '\\';
                     esc = '\\';
                 }
                 break;

             default:
                 esc = '\\';
                 break;
             }
         }
         else if (ch > '~') {
             // convert to a C escape sequence (octal)
             esc = '\\';
         }
         else {
             // escape special characters
             switch (ch) {
             case '?':
                 if (!(mask & question_mark))
                     esc = '\\';
                 break;

             case '\'':
                 if (!(mask & single_quote))
                     esc = '\\';
                 break;

             case '"':
                 if (!(mask & double_quote))
                     esc = '\\';
                 break;

             case '\\':
                 if (!(mask & backslash))
                     esc = '\\';
                 break;
             }

             if (!(mask & trigraphs)) {

                 // convert to a trigraph sequence
                 switch (ch) {
                 case '#':  ch  = '=';  esc = '?'; break;
                 case '\\': ch  = '/';  esc = '?'; break;
                 case '^':  ch  = '\''; esc = '?'; break;
                 case '[':  ch  = '(';  esc = '?'; break;
                 case ']':  ch  = ')';  esc = '?'; break;
                 case '|':  ch  = '!';  esc = '?'; break;
                 case '{':  ch  = '<';  esc = '?'; break;
                 case '}':  ch  = '>';  esc = '?'; break;
                 case '~':  ch  = '-';  esc = '?'; break;
                 }
             }
         }

         // process `ch' and `esc'

         if ('\\' == esc) {

             // conversion was performed
             res = ok;

             if (ch < ' ' || ch > '~') {

                 // need room for an escape followed by three ocal digits
                 if (4 > to_end - to_next) {
                     res = partial;
                     break;
                 }

                 static const char digits[] = "0123456789abcdef";

                 // add an escape character
                 *to_next++ = esc;

                 if (mask & hex) {
                     // add hex representation (exactly three chars)
                     *to_next++ = 'x';
                     *to_next++ = digits [(ch & 0xf0) >> 4];
                     *to_next++ = digits [ch & 0xf];
                 }
                 else {
                     // add octal representation (exactly three digits)
                     *to_next++ = digits [(ch & (7 << 6)) >> 6];
                     *to_next++ = digits [(ch & (7 << 3)) >> 3];
                     *to_next++ = digits [ch & 7];
                 }
             }
             else {
                 // need room for an escape followed by a single char
                 if (2 > to_end - to_next) {
                     res = partial;
                     break;
                 }

                 // add an escape char followed by the escaped char
                 *to_next++ = esc;
                 *to_next++ = ch;
             }
         }
         else if ('?' == esc) {
             // need room for a trigraph sequence
             if (3 > to_end - to_next) {
                 res = partial;
                 break;
             }

             // conversion was performed
             res = ok;

             // add a trigraph sequence
             *to_next++ = '?';
             *to_next++ = '?';
             *to_next++ = ch;
         }
         else {
             // not escaped
             *to_next++ = ch;
         }
     }

     if (noconv == res) {
         // 22.2.1.5.2, p2, Note: no conversion was necessary
         from_next = from;
         to_next   = to;
     }

     rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
               "user-defined codecvt: internal inconsistency");

     rw_fatal (to_next >= to && to_next <= to_end, 0, __LINE__,
               "user-defined codecvt: internal inconsistency");

     return res;
 }


 std::codecvt_base::result
 cformat::do_in (      state_type& /* unused */,
                 const extern_type *from,
                 const extern_type *from_end,
                 const extern_type *&from_next,
                       intern_type *to,
                       intern_type *to_end,
                       intern_type *&to_next) const
 {
     // assert 22.2.1.5.2, p1 preconditions
     rw_assert (from <= from_end, __FILE__, __LINE__,
                "codecvt::do_in (..., from = %#p, from + %d, %#p, "
                "to = %#p, to + %d, %#p): from <= from_end",
                from, from_end - from, from_next, to, to_end - to, to_next);

     rw_assert (to <= to_end, __FILE__, __LINE__,
                "codecvt::do_in (..., from = %#p, from + %d, %#p, "
                "to = %#p, to + %d, %#p) to <= to_end",
                from, from_end - from, from_next, to, to_end - to, to_next);

     result res = ok;

     for (from_next = from, to_next = to; from_next != from_end; ++from_next) {

         unsigned char ch = *from_next;
         intern_type c    = intern_type ();

         if ('\\' == ch) {

             if (2 > from_end - from_next) {
                 // ok is the correct value to return in this case,
                 // but partial should be handled as well for robustness
                 res = (from_end - (extern_type*)0) % 2 ? ok : partial;
                 break;
             }

             ch = from_next [1];

             if ('x' == ch) {
                 // interpret a hex escape sequence

                 // advance past '\x'
                 const extern_type *next = from_next + 2;

                 // parse hex digits until a non-hex digits is encountered
                 for (; ; ++next) {

                     if (next == from_end) {
                         // do not advance to the end since there may be
                         // more digits following it (e.g., '\x012' with
                         // from_end pointing at '1' or '2')
                         return partial;
                     }

                     ch = *next;
                     if (ch >= '0' && ch <= '9')
                         c = (c << 4) | (ch - '0');
                     else if (ch >= 'a' && ch <= 'f')
                         c = (c << 4) | (ch - 'a' + 10);
                     else if (ch >= 'A' && ch <= 'F')
                         c = (c << 4) | (ch - 'A' + 10);
                     else if (next - from_next > 2)
                         break;
                     else {
                         return error;   // non-hex digit immediately after '\x'
                     }
                 }

                 // advance to the end of parsed number
                 from_next = next - 1;
             }
             else if ('0' <= ch && '7' >= ch) {
                 // interpret a oct escape sequence

                 // (tentatively) advance past '\'
                 const extern_type *next = from_next + 1;

                 // parse at most three oct digits
                 for (; next - from_next < 4; ++next) {

                     if (next == from_end) {
                         // do not advance to the end since there may be
                         // more digits following it (e.g., '\x012' with
                         // from_end pointing at '1' or '2')
                         return partial;
                     }

                     ch = *next;
                     if (ch >= '0' && ch <= '7')
                         c = (c << 3) | (ch - '0');
                     else if (next - from_next)
                         break;
                     else {
                         // advance to the offending char
                         from_next = next;
                         return error;   // non-oct digit immediately after '\'
                     }
                 }

                 // advance to the end of parsed number
                 from_next = next - 1;
             }
             else {
                 // interpret standard C escape sequence
                 switch (ch) {
                 case 'a': c = '\a'; break;
                 case 'b': c = '\b'; break;
                 case 't': c = '\t'; break;
                 case 'n': c = '\n'; break;
                 case 'v': c = '\v'; break;
                 case 'f': c = '\f'; break;
                 case 'r': c = '\r'; break;

                 // optional but allowed and escaped backslash
                 case '?': case '"': case '\'': case '\\': c = ch ; break;

                 // bad escape sequence
                 default: return error;
                 }

                 // advance past the initial '\'
                 ++from_next;
             }
         }
         else if ('?' == ch && !(mask & trigraphs)) {
             // (try to) convert a trigraph sequence
             if (   2 > from_end - from_next
                 || '?' == from_next [1] && 3 > from_end - from_next) {
                 res = partial;
                 break;
             }

             if ('?' == from_next [1]) {

                 // "??" (potentilly) introduces a trigraph sequence
                 switch (from_next [2]) {

                 case '=':  c = '#';  break;
                 case '/':  c = '\\'; break;
                 case '\'': c = '^';  break;
                 case '(':  c = '[';  break;
                 case ')':  c = ']';  break;
                 case '!':  c = '|';  break;
                 case '<':  c = '{';  break;
                 case '>':  c = '}';  break;
                 case '-':  c = '~';  break;

                 default:
                     // no a trigraph sequence, won't convert
                     c = from_next [0];   // i.e., '?'
                 }

                 // skip the leading "??" of a trigraph sequence
                 if (c != from_next [0])
                     from_next += 2;
             }
             else
                 // ordinary (not escaped) character
                 c = ch;
         }
         else
             // ordinary (not escaped) character
             c = ch;

         // to_next may be 0 (when called from do_length())
         // doing pointer math on invalid pointers (null) has undefined behavior
         // but will probably work in most cases
         if (to_next)
             *to_next = c;

         ++to_next;

         // in case of of the inner loops has reached end
         if (from_next == from_end)
             break;
     }

     rw_fatal (from_next >= from && from_next <= from_end, 0, __LINE__,
               "user-defined codecvt: internal inconsistency");

     rw_fatal (to_next >= to && (to_next <= to_end || !to_end), 0, __LINE__,
               "user-defined codecvt: internal inconsistency");

     return res;
 }


 int
 cformat::do_length (state_type&,
                     const extern_type *from,
                     const extern_type *from_end,
                     std::size_t        max) const
 {
     const extern_type *from_next;
     intern_type *to_next = 0;

     std::mbstate_t st;
     std::memset (&st, 0, sizeof st);

     // use do_in() with `to' of 0 to do the computation
     // doing pointer math on invalid pointers (null) has undefined behavior
     // but will probably work in most cases
     do_in (st, from, from_end, from_next,
            to_next, to_next + max, to_next);

     return int (to_next - (intern_type*)0);
 }

 /***********************************************************************/

 // determines file size in bytes
 static std::streamsize
 fsize (const char *fname)
 {
     std::FILE* const f = std::fopen (fname, "r");
     if (!f || std::fseek (f, 0, SEEK_END))
         return -1;

     const std::streamsize size = std::ftell (f);

     std::fclose (f);

     return size;
 }

 /***********************************************************************/

 static void
 self_test ()
 {
     rw_info (0,  __FILE__, __LINE__,
              "user-defined codecvt facet -- self test");

     static const char* const result[] = {
         "ok", "partial", "error", "noconv"
     };

     // user-defined code conversion facet
     cformat fmt (1);

     // original array of internal characters and one to which to convert
     // an external representation back to (for comparison)
     cformat::intern_type intrn [2][256] = { { '\0' } };

     // array of external chars large enough to hold the internal array
     // each internal char converts to at most 4 external chars
     cformat::extern_type extrn [1024] = { '\0' };

     // fill internal array with chars from '\1' to '\377'
     for (std::size_t i = 0; i != sizeof intrn [0] - 1; ++i)
         intrn [0][i] = cformat::intern_type (i + 1);

     const cformat::intern_type *intrn_next_0 = 0;
           cformat::intern_type *intrn_next_1 = 0;

           cformat::extern_type *extrn_next   = 0;


     // dummy (state not used, conversion is stateless)
     std::mbstate_t st;
     std::memset (&st, 0, sizeof st);

     // convert internal to external representation, substituting
     // escape sequences for non-printable characters
     std::codecvt_base::result res;

     // convert array in internal representation to external representation
     res = fmt.out (st,
                    intrn [0], intrn [0] + sizeof intrn [0], intrn_next_0,
                    extrn, extrn + sizeof extrn, extrn_next);

     rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
                "codecvt::out (); result == codecvt_base::ok, "
                "got codecvt_base::%s", result [res]);

     // assert that the external sequence is longer than the internal one
     rw_assert (extrn_next - extrn > intrn_next_0 - intrn [0], 0, __LINE__,
                "codecvt::out (); converted size %d, expected > %d",
                extrn_next - extrn, intrn_next_0 - intrn [0]);

     // convert external to internal representation, parsing
     // multi-char escape sequences into single chars

     const cformat::intern_type *next = extrn;

     intrn_next_1 = intrn [1];

     for (; next != extrn_next; ) {

         // allow only a small buffer space to exercise partial conversion
         std::size_t step = std::size_t (extrn_next - next);
         if (step > 12)
             step = 5 + step % 8;

         res = fmt.in (st,
                       next, next + step, next,
                       intrn_next_1, intrn [1] + sizeof intrn [1], intrn_next_1);

         if (std::codecvt_base::error == res)
             break;
     }

     // assert that entrire sequence converted ok
     rw_assert (std::codecvt_base::ok == res, 0, __LINE__,
                "codecvt::in (); result == codecvt_base::ok, got "
                "codecvt_base::%s at offset %d", result [res], next - extrn);

     rw_assert (intrn_next_1 == intrn [1] + sizeof intrn [1], 0, __LINE__,
                "codecvt::in (); to_next == %#p, got %#p",
                intrn [1] + sizeof intrn [1], intrn_next_1);

     rw_assert (0 == std::strcmp (intrn [0], intrn [1]), 0, __LINE__,
                "codecvt<>::out/in ()");
 }

 /***********************************************************************/

 #define RW_ASSERT_STATE(strm, state)                            \
         rw_assert ((strm).rdstate () == (state), 0, __LINE__,   \
                    "rdstate () == %{Is}, got %{Is}",            \
                    (state), (strm).rdstate ())

 static void
 test_noconv (const char *fname)
 {
     rw_info (0, 0, __LINE__, "ifstream extraction without conversion");

     std::ifstream f (fname);

     // make sure file stream has been successfully opened
     RW_ASSERT_STATE (f, std::ios::goodbit);

     // gain public access to protected members
     struct pubbuf: std::streambuf {

         // working around an MSVC 6.0 bug (PR #26330)
         typedef std::streambuf Base;

         virtual std::streamsize showmanyc () {
             return Base::showmanyc ();
         }
         char* pubgptr () {
             return Base::gptr ();
         }
         char* pubegptr () {
             return Base::egptr ();
         }
     };

     // use static cast through void* to avoid using reinterpret_cast
     pubbuf *rdbuf = _RWSTD_STATIC_CAST (pubbuf*, (void*)f.rdbuf ());

     std::streamsize filesize = rdbuf->showmanyc ();

     // exercise 27.5.2.4.3, p1: showmanyc() returns the "estimated"
     // size of the sequence (i.e., the file size in this case)
     rw_assert (filesize == fsize (fname), 0, __LINE__,
                "streambuf::showmanyc () == %ld, got %ld",
                fsize (fname), filesize);

     // exercise 27.5.2.2.3, p1
     filesize = f.rdbuf ()->in_avail ();

     const char *gptr  = rdbuf->pubgptr ();
     const char *egptr = rdbuf->pubgptr ();

     rw_assert (filesize == (gptr < egptr ? egptr - gptr : fsize (fname)),
                0, __LINE__,
                "streambuf::in_avail () == %ld, got %ld",
                (gptr < egptr ? egptr - gptr : fsize (fname)), filesize);

     // allocate buffer large enough to accomodate the converted
     // (i.e. internal) sequence
     const std::size_t bufsize = 0x10000;     // 64k should do it
     char *tmpbuf = new char [bufsize];

     // fill with non-0 value to check for writes past the end
     // (see also Onyx incident 14033)
     std::memset (tmpbuf, '\x7f', bufsize);

     // ecercise putback area
     std::streamsize i;

 #ifndef _RWSTD_PBACK_SIZE
 #  define _RWSTD_PBACK_SIZE 1
 #endif   // _RWSTD_PBACK_SIZE

     // _RWSTD_PBACK_SIZE is the size of the putback area the library
     // was configured with; the macro expands to streamsize (N)
     for (i = 0; i != _RWSTD_PBACK_SIZE + 1; ++i) {
         // read a few characters, read must not append a '\0'
         std::streamsize n = f.read (tmpbuf, i).gcount ();

         // assert that read exactly `i' chars, buffer not null-terminared
         rw_assert (i == n && '\x7f' == tmpbuf [i], 0, __LINE__,
                    "ifstream::read (%#p, %ld) read %ld,"
                    "buffer terminated with '\\%03o'",
                    tmpbuf, i, n, tmpbuf [i]);

         // put back read characters; assert that they are the same
         // as those in the corresponding positions in the buffer
         for (std::streamsize j = 0; j != i; ++j) {
             std::ifstream::int_type c = f.rdbuf ()->sungetc ();

             typedef std::ifstream::traits_type Traits;

             rw_assert (Traits::to_int_type (tmpbuf [n - j - 1]) == c,
                        0, __LINE__,
                        "filebuf::sungetc() == '\\%03o', got '\\%03o",
                        tmpbuf [n - j - 1], c);
         }

         // re-read characters just put back
         char buf [_RWSTD_PBACK_SIZE + 1];
         std::memset (buf, '\x7f', sizeof buf);

         std::streamsize n2 = f.read (buf, i).gcount ();

         // assert that the requested number of chars were read in
         rw_assert (i == n2 && '\x7f' == buf [i], 0, __LINE__,
                    "ifstream::read (%#p, %ld) read %ld,"
                    "buffer terminated with '\\%03o'",
                    buf, i, n2, buf [i]);

         // assert that the read chars are those that were put back
         for (std::streamsize k = 0; k != i; ++k) {
             rw_assert (buf [k] == tmpbuf [k], 0, __LINE__,
                        "buffer mismatch at offset %ld: got '\\%03o', "
                        "expected '\\%03o'", k, buf [k], tmpbuf [k]);
             // put character back again so that it can be read back in
             f.rdbuf ()->sungetc ();
         }
     }

     // read file contents into buffer (apply no conversion)
     const std::streamsize n = f.read (tmpbuf, bufsize).gcount ();

     // 27.6.1.3, p28 - read() sets eofbit | failbit
     // if end-of-file occurs on the input sequence
     RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);

     // assert that the entiire file has been read
     rw_assert (n == filesize && '\x7f' == tmpbuf [n], 0, __LINE__,
                "ifstream::read (%#p, %ld) read %ld, expected %ld; "
                "buffer terminated with '\\%03o' [%s]",
                tmpbuf, bufsize, n, filesize, tmpbuf [n], fname);

     tmpbuf [n] = '\0';

     // assert that file contains no control characters
     bool b = true;
     for (i = 0; b && i != UCHAR_MAX + 1; ++i) {
         if (i >= ' ' || i <= '~')
             continue;
         b = 0 == std::strchr (tmpbuf, char (i));
     }

     rw_assert (b, 0, __LINE__,
                "unescaped non-printable character '\\#03o' at offset %ld",
                 tmpbuf [i], i);

     delete[] tmpbuf;
 }

 /***********************************************************************/

 static void
 test_error (const char *fname)
 {
     rw_info (0, 0, __LINE__, "ifstream extraction with a conversion error");

     const char outbuf[] = {
         "abcdefghijklmnopqrstuvwxyz\\x20\\xzzABCDEFGHIJKLMNOPQRSTUVWXYZ"
         //                                 ^
         //                                 |
         // error (invalid hex sequence) ---+
     };

     // write out a text file containing a conversion error
     std::ofstream ostrm (fname);

     ostrm << outbuf;

     ostrm.close ();

     // read the file back in using the conversion facet
     std::ifstream istrm (fname);

     // user-defined code conversion facet
     const cformat fmt (1 /* prevent locale from deleting */);

     // create a locale by combining the classic locale and our UD facet
     // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
     std::locale l (std::locale::classic (), &fmt);

     // imbue locale with formatting facet into streams and save previous
     l = istrm.imbue (l);

     char inbuf [sizeof outbuf * 4] = { 0 };

     // try to read partial contents of the file
     // including the conversion error into the buffer
     istrm.read (inbuf, 26L + 4L /* "a..z" <space> <error> "AB" */);

     // verify that the operation failed, eofbit is set since
     // less than the requested number of characters have been read
     RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);

     // verify that the 26 characters 'a' through 'z' plus
     // the space (i.e., 27 chars) have been extracted
     rw_assert (27 == istrm.gcount (), 0, __LINE__,
                "ifstream::read () extracted %d, expected 27",
                istrm.gcount ());

     rw_assert (   0 == std::ifstream::traits_type::compare (inbuf, outbuf, 26)
                && ' ' == inbuf [26], 0, __LINE__,
                "ifstream::read () got \"%s\", expected \"%.26s \"",
                inbuf, outbuf);

     istrm.clear ();

     // try to read again, and verify that the operation fails
     istrm.read (inbuf, sizeof inbuf);

     RW_ASSERT_STATE (istrm, std::ios::eofbit | std::ios::failbit);

     rw_assert (0 == istrm.gcount (), 0, __LINE__,
                "ifstream::read () extracted %d, expected 0",
                istrm.gcount ());

     // replace the imbued locale
     istrm.imbue (l);
 }

 /***********************************************************************/

 static void
 test_seek (const char *fname,
            std::size_t bufsize = std::size_t (-1))
 {
     std::ifstream f (fname);

     // make sure stream has been successfully opened
     RW_ASSERT_STATE (f, std::ios::goodbit);

     // set buffer size if specified
     if (std::size_t (-1) != bufsize) {

         rw_info (0, 0, __LINE__,
                  "ifstream::seekg()/tellg() - %zu byte buffer", bufsize);

         RW_ASSERT_STATE (f, std::ios::goodbit);
         f.rdbuf ()->pubsetbuf (0, bufsize);
     }
     else {
         rw_info (0, 0, __LINE__,
                  "ifstream::seekg()/tellg() - default buffer size");
     }

     // user-defined code conversion facet
     const cformat fmt (1 /* prevent locale from deleting */);

     // create a locale by combining the classic locale and our UD facet
     // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
     std::locale l (std::locale::classic (), &fmt);

     // imbue locale with formatting facet into streams and save previous
     l = f.imbue (l);

     // seek to the beginning of stream (safe)
     f.seekg (0);
     RW_ASSERT_STATE (f, std::ios::goodbit);

     const unsigned char max = UCHAR_MAX - '~';

     for (std::size_t n = 0; n != std::size_t (max - 1); ++n) {

         const char delim = char ('~' + n);

         // skip over chars until the terminating delim (and extract it)
         f.ignore (0x10000, std::fstream::traits_type::to_int_type (delim));
         rw_assert (f.good (), 0, __LINE__,
                    "istream::ignore (0x10000, '\\%03o'); "
                    "rdstate() = %{Is}, gcount() = %ld",
                    delim, f.rdstate (), f.gcount ());

         // alternate between exercising seekg() and pubsync()
         if (n % 2) {
             // seek inplace (offset must be 0 for MB encodings)
             f.seekg (0, std::ios::cur);
             RW_ASSERT_STATE (f, std::ios::goodbit);
         }
         else {
             // filebuf::pubsync() must return 0
             int syn = f.rdbuf ()->pubsync ();
             rw_assert (0 == syn, 0, __LINE__,
                        "filebuf::pubsync () == 0, got %d", syn);
         }

         // skip exactly one char forward (retrieve a space)
         char c = char ();
         f.get (c);
         RW_ASSERT_STATE (f, std::ios::goodbit);
         rw_assert (' ' == c, 0, __LINE__,
                    "istream::get(char_type) got '\\%03o', expected ' '", c);

         // get current file position
         const std::ifstream::pos_type pos = f.tellg ();
         RW_ASSERT_STATE (f, std::ios::goodbit);

         // extract offset - should be the same as pos
         long offset = 0;
         f >> offset;

         RW_ASSERT_STATE (f, std::ios::goodbit);

         rw_assert (long (pos) == offset, 0, __LINE__,
                    "ifstream::operator>>() expected %ld, got %ld",
                    long (pos), offset);

         // in_avail() must return a value > 0
         std::streamsize avail = f.rdbuf ()->in_avail ();
         rw_assert (avail > 0, 0, __LINE__,
                    "filebuf::in_avail() expected > 0, got %ld", avail);

         // "rewind" stream to the beginning
         f.seekg (0);
         RW_ASSERT_STATE (f, std::ios::goodbit);

         // try seeking to the previous position
         f.seekg (pos);
         RW_ASSERT_STATE (f, std::ios::goodbit);
         rw_assert (f.tellg () == pos, 0, __LINE__,
                    "istream::seekg (%ld); tellg () returns %ld",
                    long (pos), long (f.tellg ()));

         // re-read offset - should be the same as file pos
         f >> offset;

         RW_ASSERT_STATE (f, std::ios::goodbit);

         rw_assert (long (pos) == offset, 0, __LINE__,
                    "ifstream::operator>>() expected %ld, got %ld",
                    long (pos), offset);
     }

     // ignore the rest of file, eofbit must be set
     f.ignore (0x10000);
     RW_ASSERT_STATE (f, std::ios::eofbit);

     // in_avail() must return 0
     const std::streamsize avail = f.rdbuf ()->in_avail ();
     rw_assert (0 == avail, 0, __LINE__,
                "filebuf::in_avail() expected 0, got %ld", avail);

     // imbue original locale (currently imbued locale
     // will be destroyed prior to the destruction of `f')
     f.imbue (l);
 }

 /***********************************************************************/

 static int
 run_test (int, char*[])
 {
     // self-test make sure facet works
     self_test ();

     // user-defined code conversion facet
     const cformat fmt (1 /* prevent locale from deleting */);

     // create a locale by combining the classic locale and our UD facet
     // cformat; the facet will replace std::codecvt<char, char, mbstate_t>
     std::locale l (std::locale::classic (), &fmt);

     const char *fname = rw_tmpnam (0);

     if (!fname)
         return 1;

     // will be populated with file offsets and escape sequences
     char buffer [4096] = { '\0' };

     std::size_t buflen = 0;

     // generate file contents using UD conversion
     if (1) {
         rw_info (0, 0, __LINE__,
                  "ofstream insertion with multibyte conversion");

         std::ofstream f (fname);

         // make sure file stream has been successfully opened
         RW_ASSERT_STATE (f, std::ios::goodbit);

         // imbue locale with formatting facet into stream
         f.imbue (l);

         for (std::size_t i = 1; i != UCHAR_MAX + 1U; ++i) {

             const std::ofstream::pos_type pos = f.tellp ();

             RW_ASSERT_STATE (f, std::ios::goodbit);

             buflen = std::strlen (buffer);

             // append the file offset followed by a (possibly escaped) char
             std::sprintf (buffer + buflen, "%ld %c ", long (pos), char (i));

             // write out the just appended portion of the buffer
             f << (buffer + buflen);

             RW_ASSERT_STATE (f, std::ios::goodbit);
         }

         buflen = std::strlen (buffer);

         // file contains the contents of buffer with non-printable
         // chars replaced with escape sequences (e.g., tabs with '\t', etc.)
     }


     // read contents of file w/o conversion
     test_noconv (fname);

     // read contents of file, apply conversion
     if (1) {
         rw_info (0, 0, __LINE__,
                  "ifstream extraction with multibyte conversion");

         std::ifstream f (fname);

         // make sure file stream has been successfully opened
         RW_ASSERT_STATE (f, std::ios::goodbit);

         // imbue locale with formatting facet into stream
         f.imbue (l);

         // allocate buffer large enough to accomodate the converted
         // (i.e. internal) sequence
         char tmpbuf [sizeof buffer];

         // read file contents into buffer, convert escape sequences
         // into the corresponding (perhaps unprintable) characters
         const std::streamsize n = f.read (tmpbuf, sizeof tmpbuf).gcount ();

         // 27.6.1.3, p28 - read() sets eofbit | failbit
         // if end-of-file occurs on the input sequence
         RW_ASSERT_STATE (f, std::ios::eofbit | std::ios::failbit);

         rw_assert (long (n) == buflen, 0, __LINE__,
                    "ifstream::read (%#p, %d); read %ld, expected %d",
                    tmpbuf, sizeof tmpbuf, long (n), buflen);

         // assert that converted file contents are the same
         // as the originally generated buffer
         const long len = long (n) < buflen ? long (n) : buflen;
         for (long i = 0; i != len; ++i) {
             if (tmpbuf [i] != buffer [i]) {
                 rw_assert (0, 0, __LINE__,
                            "'\\%03o' == '\\%03o'; offset %d",
                            (unsigned char)buffer [i],
                            (unsigned char)tmpbuf [i], i);
                 break;
             }
         }
     }


     // test with default buffer
     test_seek (fname);

     // retest with buffer of user-defined size
     for (std::size_t n = 4096; n != std::size_t (-1);
          n -= 1024 < n ? 1024 : 256 < n ? 256 : 16 < n ? 16 : 1)
         test_seek (fname, n);


     // test with errors during conversion
     test_error (fname);

     // remove a temporary file
     std::remove (fname);

     return 0;
 }

 /**************************************************************************/

 int main (int argc, char *argv[])
 {
     return rw_test (argc, argv, __FILE__,
                     "lib.filebuf",
                     0 /* no comment */,
                     run_test,
                     "", 0);
 }