util/collate.cpp - stdcxx - Git at Google

 /***************************************************************************
  *
  * collate.cpp
  *
  * $Id$
  *
  ***************************************************************************
  *
  * Licensed to the Apache Software  Foundation (ASF) under one or more
  * contributor  license agreements.  See  the NOTICE  file distributed
  * with  this  work  for  additional information  regarding  copyright
  * ownership.   The ASF  licenses this  file to  you under  the Apache
  * License, Version  2.0 (the  "License"); you may  not use  this file
  * except in  compliance with the License.   You may obtain  a copy of
  * the License at
  *
  * http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the  License is distributed on an  "AS IS" BASIS,
  * WITHOUT  WARRANTIES OR CONDITIONS  OF ANY  KIND, either  express or
  * implied.   See  the License  for  the  specific language  governing
  * permissions and limitations under the License.
  *
  * Copyright 2001-2006 Rogue Wave Software.
  *
  **************************************************************************/

 #include "def.h"          // for Def
 #include "diagnostic.h"   // for issue_diag()
 #include "path.h"         // for get_pathname()
 #include "scanner.h"      // for scanner

 #include <fstream>        // for ofstream
 #include <iostream>       // for cout
 #include <string>         // for string
 #include <vector>         // for vector

 #include <cassert>        // for assert()
 #include <cctype>         // for toupper()
 #include <cstdio>         // for sprintf()
 #include <cstdlib>        // for strtoul()
 #include <cstring>        // for strchr()


 void Def::
 create_wchar_utf8_table ()
 {
     if (!wchar_utf8_to_ext_.empty())
         return;

     typedef std::map<std::string, wchar_t>::const_iterator n_cmap_citer2;

     const n_cmap_citer2 n_cmap_end = charmap_.get_mb_cmap ().end ();

     for (n_cmap_citer2 n_cmap_it = charmap_.get_mb_cmap ().begin ();
          n_cmap_it != n_cmap_end; ++n_cmap_it) {

         const std::string wchar_utf8 = utf8_encode (n_cmap_it->second);

         wchar_utf8_to_ext_.insert (std::make_pair (wchar_utf8,
                                                    n_cmap_it->first));
     }
 }


 void Def::process_weights (collate_entry_t& entry)
 {
     // iterator of weights tokens
     token_list_t::iterator w_it = entry.second.begin ();

     w_cmap_iter   wcmap_it;
     ce_map_iter   ce_map_it;
     coll_map_iter coll_map_it;

     Weights_t* weights = new Weights_t[collate_out_.num_weights];

     std::size_t i = 0;
     for (i = 0; i < collate_out_.num_weights && w_it != entry.second.end ();
          ++i, ++w_it){
         get_weight (*w_it, weights, i);
     }

     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     // the remaining weights - see above - are given as weight the
     // collating value of the symbol at start of line
     while (i < collate_out_.num_weights) {
         weights[i].size = 1;
         wcmap_it = w_cmap.find (entry.first.name);
         if (wcmap_it != w_cmap.end()) {
             coll_map_it = coll_map_.find (wcmap_it->second);
             weights[i].weight[0] = coll_map_it->second.coll_val;
         }
         else if ((ce_map_it = ce_map_.find (entry.first.name)) !=
                  ce_map_.end()) {
            weights[i].weight[0] = ce_map_it->second.coll_val;
         }
         else {
             warnings_occurred_ =
                 issue_diag (W_COLSYM, false, &entry.first,
                             "symbolic name %s not defined as character or "
                             "collating-element, ignoring\n",
                             entry.first.name.c_str()) || warnings_occurred_;
         }
         i++;
     }

     wcmap_it = w_cmap.find (entry.first.name);
     if (wcmap_it != w_cmap.end()) {
         coll_map_it = coll_map_.find (wcmap_it->second);
         coll_map_it->second.weights = weights;
     } else if ((ce_map_it = ce_map_.find (entry.first.name)) != ce_map_.end()) {
         ce_map_it->second.weights = weights;
     } else {
         warnings_occurred_ =
             issue_diag (W_COLSYM, false, &entry.first,
                         "symbolic name %s not defined as character or "
                         "collating-element, ignoring\n",
                         entry.first.name.c_str()) || warnings_occurred_;
     }
 }


 unsigned int Def::process_order_stmt (collate_section_t& section)
 {
     // number of orders in the section
     collate_out_.num_weights = section.order.size ();
     if (collate_out_.num_weights == 0) {
         collate_out_.num_weights = 1;
         collate_out_.weight_type[0] = 0;
     } else {
         collate_out_.num_weights = 0;
         token_list_t::iterator ord_it = section.order.begin ();
         for (; ord_it != section.order.end ();
              ++ord_it, ++collate_out_.num_weights) {
             if (ord_it->token == Scanner::tok_forward) {
                 collate_out_.weight_type[collate_out_.num_weights] = 0;
             } else if (ord_it->token == Scanner::tok_backward) {
                 collate_out_.weight_type[collate_out_.num_weights] = 1;
             } else if (ord_it->token == Scanner::tok_position) {
                 // this is equivalent to "forward,position"
                 if (no_position_)
                     collate_out_.weight_type[collate_out_.num_weights] = 0;
                 else
                     collate_out_.weight_type[collate_out_.num_weights] = 2;
             } else {
                 std::string::size_type comma_pos = ord_it->name.find (',');
                 if (comma_pos != std::string::npos) {
                     std::string first  = ord_it->name.substr (0, comma_pos);
                     std::string second = ord_it->name.substr (comma_pos + 1);

                     // verify a valid weight ordering
                     if ((first == "forward" && second != "position") ||
                         first == "backward" && second != "position") {
                         issue_diag (E_COLORD, true,
                                     &*ord_it, "unrecognized collating order\n");
                         break;
                     } else if (first == "forward") {
                         if (no_position_)
                             collate_out_.weight_type
                                 [collate_out_.num_weights] = 0;
                         else
                             collate_out_.weight_type
                                 [collate_out_.num_weights] = 2;
                     }
                     else
                         collate_out_.weight_type[collate_out_.num_weights] = 3;
                 }
                 else {
                     issue_diag (E_COLORD, true,
                                 &*ord_it, "unrecognized collating order\n");
                 }
             }
         }
     }

     // build the order value;
     unsigned int order = 0;
     for (int i = 0; i < collate_out_.num_weights; ++i) {
         order <<= 2;
         order |= collate_out_.weight_type[i];
     }

     return order;
 }


 // decimally increment the symbolic name, turning something like
 // <U1234> into <U1245>, or <jis234> to <jis235>
 static std::string
 dec_increment (const std::string &sym)
 {
     const char *pdig = sym.c_str ();

     for (; !(std::isdigit (*pdig)); ++pdig);

     char *end;

     const unsigned long sym_val = std::strtoul (pdig, &end, 10);

     char numstr [64];
     std::sprintf (numstr, "%lu", sym_val + 1);

     std::string next_sym = sym.substr (0, pdig - sym.c_str ());
     next_sym.append (numstr);
     next_sym.append (end);

     return next_sym;
 }


 // hexadecimally increment the symbolic name
 static std::string
 hex_increment (const std::string& sym)
 {
     const char *pdig = sym.c_str ();

     for (; !(std::isxdigit (*pdig)); ++pdig);

     char *end;

     const unsigned long sym_val = std::strtoul (pdig, &end, 16);

     char numstr [64];
     std::sprintf (numstr, "%lx", sym_val + 1);

     std::string next_sym = sym.substr (0, pdig - sym.c_str ());
     next_sym.append (numstr);
     next_sym.append (end);

     return next_sym;
 }


 // scanning ahead and adding all symbols to the collating symbols map
 void Def::preprocess_collation_definitions()
 {
     // start with collation values from 2
     unsigned int coll_value = 2;

     // previous_elm and next_elm are used for processing ellipsis.
     std::string prev_elm;
     std::string next_elm;

     static unsigned int max_orders = 0;

     token_list_t::iterator cs_it = sym_list_.begin ();
     for (; cs_it != sym_list_.end (); ++cs_it) {
         cs_map_iter csm_it = cs_map_.find (cs_it->name);
         if (csm_it != cs_map_.end ())
             csm_it->second = coll_value++;
     }

     std::list<collate_section_t>::iterator s_it = section_list_.begin ();

     if (s_it != section_list_.end()) {
         // get maximum weights count
         max_orders = s_it->order.size ();
     }

     for (; s_it != section_list_.end (); ++s_it) {
         if (max_orders != s_it->order.size ())
             issue_diag (E_COLNUM, true, &*s_it->order.begin (),
                         "number of collation orders "
                         "different for this section\n");
     }

     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     // preprocess sections' collation definitions
     for (s_it = section_list_.begin ();
          s_it != section_list_.end (); ++s_it) {

         // process the order statement and get the weight
         unsigned int order = process_order_stmt (*s_it);

         std::list<collate_entry_t>::iterator e_it =
             s_it->entries.begin ();

         for (; e_it != s_it->entries.end (); ++e_it) {
             switch (e_it->first.token) {
             case Scanner::tok_sym_name:
                 // store the previous name
                 prev_elm = e_it->first.name;

                 // process each of the collation definitions
                 process_collation_definition (false, *e_it,
                                               coll_value++, order);

                 break;

             case Scanner::tok_abs_ellipsis:   // "<FOO>...<BAR>"
             case Scanner::tok_hex_ellipsis:   // "<FOO>..<BAR>"
             case Scanner::tok_dec_ellipsis:   // "<FOO>....<BAR>"
             {

                 if (++e_it != s_it->entries.end ())
                     next_elm = e_it->first.name;
                 else {
                     issue_diag (E_SYNTAX, true, &e_it->first,
                                 "unexpected end of collation section while"
                                 " processing ellipsis during scan_ahead\n");
                 }

                 if (e_it->first.token == Scanner::tok_abs_ellipsis) {
                     wchar_t w_cmap_cur_val
                         = w_cmap.find(prev_elm)->second;
                     wchar_t w_cmap_end_val
                         = w_cmap.find(next_elm)->second;

                     // the first value has already been
                     // added so don't add it again
                     w_cmap_cur_val = charmap_.increment_wchar (w_cmap_cur_val);
                     while (w_cmap_cur_val != w_cmap_end_val) {
                         // process iteration
                         collate_info_t ci = {UINT_MAX, UINT_MAX, 0, 0};
                         ci.coll_val = coll_value++;
                         ci.order = order;

                         coll_map_.insert (std::make_pair (w_cmap_cur_val, ci));
                         w_cmap_cur_val =
                             charmap_.increment_wchar (w_cmap_cur_val);
                     }

                     // add last element "next_elm" to array
                     collate_info_t ci = {UINT_MAX, UINT_MAX, 0, 0};
                     ci.coll_val = coll_value++;
                     ci.order = order;
                     coll_map_.insert (std::make_pair (w_cmap_cur_val, ci));

                 }
                 else {
                     // we are incrementing the symbolic names
                     std::string next_name = prev_elm;
                     do {
                         if (e_it->first.token == Scanner::tok_hex_ellipsis)
                             next_name = hex_increment (next_name);
                         else
                             next_name = dec_increment (next_name);

                         w_cmap_iter it = w_cmap.find (next_name);
                         if (it != w_cmap.end()) {
                             // process iteration
                             collate_info_t ci = {UINT_MAX, UINT_MAX, 0, 0};
                             ci.coll_val = coll_value++;
                             ci.order = order;
                             coll_map_.insert (
                                 std::make_pair (it->second, ci));
                         }
                     } while (next_name != next_elm);
                 }

                 prev_elm = next_elm;
                 break;
             }

             case Scanner::tok_dbl_ellipsis:
                     issue_diag (W_NOTSUP, true, &e_it->first,
                                 "ellipsis not supported"
                                 " processing ellipsis during scan_ahead\n");
             default:
                 break;
             }
         }
     }
 }


 void Def::process_collation_definition ( bool do_weights,
                                          collate_entry_t& entry,
                                          unsigned int coll_value,
                                          unsigned int order)
 {
     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     // iterators
     w_cmap_iter w_cmap_pos =
         w_cmap.find(entry.first.name);
     ce_map_iter ce_map_pos =
         ce_map_.find(entry.first.name);

     // look up the symbolic name in the wide character map
     if (w_cmap_pos != w_cmap.end()) {

         wchar_t wval = w_cmap_pos->second;
         coll_map_iter coll_map_pos = coll_map_.find (wval);

         coll_map_pos->second.coll_val = coll_value;
         coll_map_pos->second.order = order;

         // process the weights
         if (do_weights) {
             unsigned int offset = next_offset_++;
             coll_map_pos->second.offset = offset;
             off_mapr_.insert(std::make_pair(offset, entry.first.name));

             process_weights (entry);
         }

     }
     else if (ce_map_pos != ce_map_.end()) {

         ce_map_pos->second.coll_val = coll_value;
         ce_map_pos->second.order    = order;

         // process the weights
         if (do_weights) {
             unsigned int offset = next_offset_++;
             ce_map_pos->second.offset = offset;
             off_mapr_.insert(std::make_pair(offset, entry.first.name));

             process_weights (entry);
         }

     }
     else if (cs_map_.find(entry.first.name) != cs_map_.end()) {

         cs_map_.find(entry.first.name)->second = coll_value;

     }
     else {
         // the symbol is missing from all three maps;
         // add it in cs map as "sym <-> collating_value" pair
         cs_map_.insert (std::make_pair(entry.first.name, coll_value));

         warnings_occurred_ =
             issue_diag (W_COLSYM, false, &entry.first,
                         "unknown symbol name %s found in "
                         "LC_COLLATE definition\n",
                         entry.first.name.c_str()) || warnings_occurred_;
     }
 }

 void Def::process_order(collate_section_t& section,
                         unsigned int& coll_value)
 {
     std::string sym;

     // get the orders
     unsigned int section_order = process_order_stmt (section);

     // current element
     std::string curr_elm;

     // previous_elm and next_elm are used for processing ellipsis.
     std::string prev_elm;
     std::string next_elm;

     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     // iterate thru section's entries
     std::list<collate_entry_t>::iterator e_it = section.entries.begin ();
     for (; e_it != section.entries.end (); ++e_it) {
         if (e_it->first.token == Scanner::tok_sym_name) {
             prev_elm = e_it->first.name;
             process_collation_definition (true, *e_it,
                                           coll_value++, section_order);

         }
         else if (   e_it->first.token == Scanner::tok_abs_ellipsis
                  || e_it->first.token == Scanner::tok_dec_ellipsis
                  || e_it->first.token == Scanner::tok_hex_ellipsis) {

             // process line with ellipsis
             Weights_t* weights_template =
                 new Weights_t[collate_out_.num_weights];
             std::vector<bool> ordinal_weights;

             token_list_t::iterator w_it = e_it->second.begin ();
             for (int i = 0; i < collate_out_.num_weights
                      && w_it != e_it->second.end (); ++i, ++w_it) {
                 ordinal_weights.push_back (
                     get_weight (*w_it,weights_template, i));
             }
             while (ordinal_weights.size() < collate_out_.num_weights)
                 ordinal_weights.push_back (true);

             // next line
             if (++e_it != section.entries.end ())
                 next_elm = e_it->first.name;
             else {
                 issue_diag (E_SYNTAX, true, &e_it->first,
                             "unexpected end of collation section while"
                             " processing ellipsis\n");
             }

             if (e_it->first.token == Scanner::tok_abs_ellipsis) {
                 wchar_t w_cmap_cur_val
                     = w_cmap.find(prev_elm)->second;
                 wchar_t w_cmap_end_val
                     = w_cmap.find(next_elm)->second;

                 // the first value has already been added so don't add it again
                 w_cmap_cur_val = charmap_.increment_wchar (w_cmap_cur_val);

                 while (w_cmap_cur_val != w_cmap_end_val) {

                     add_to_coll (w_cmap_cur_val, weights_template,
                                  coll_value++, ordinal_weights, false);

                     w_cmap_cur_val = charmap_.increment_wchar (w_cmap_cur_val);
                 }

                 // add the end element to the collation array.
                 add_to_coll (w_cmap_end_val, weights_template, coll_value++,
                              ordinal_weights, false);

             } else {

                 // we are incrementing the symbolic names
                 std::string next_name = prev_elm;
                 do {
                     if (e_it->first.token == Scanner::tok_hex_ellipsis)
                         next_name = hex_increment (next_name);
                     else
                         next_name = dec_increment (next_name);

                     w_cmap_iter it = w_cmap.find (next_name);
                     if (it != w_cmap.end()) {
                         add_to_coll (it->second, weights_template,
                                      coll_value++, ordinal_weights, false);
                     }
                 } while (next_name != next_elm);
             }

             // store current left-hand symbol
             next_elm = prev_elm;

             delete[] weights_template;

         } else if (e_it->first.token == Scanner::tok_undefined) {

             // add all characters that were not explicitely given a collation
             // value in increasing order based on their encoded values
             undefined_keyword_found_ = true;
             collate_out_.undefined_optimization = true;
             undef_char_info_.order = section_order;
             Weights_t* weights_template
                 = new Weights_t[collate_out_.num_weights];
             std::vector<bool> ordinal_weights;

             token_list_t::iterator w_it = e_it->second.begin ();
             for (int i = 0; i < collate_out_.num_weights
                      && w_it != e_it->second.end(); ++i, ++w_it) {

                 if (w_it->token == Scanner::tok_abs_ellipsis)
                     collate_out_.undefined_optimization = false;

                 ordinal_weights.push_back (
                     !get_weight (*w_it, weights_template, i));
             }

             while (ordinal_weights.size() < collate_out_.num_weights)
                 ordinal_weights.push_back (false);

             add_missing_values (ordinal_weights, weights_template,
                                 coll_value, false);

             delete[] weights_template;
         }
     }
 }

 void Def::add_missing_values (const std::vector<bool> &ordinal_weights,
                               const Weights_t* weights_template,
                               unsigned int &coll_value, bool give_warning)
 {
     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     // we want to print a warning message once if there are characters that
     // were not given collation values.
     bool warning_issued = false;

     symnames_list_iter symnames_it;
     for (symnames_it = charmap_.get_symnames_list ().begin ();
          symnames_it != charmap_.get_symnames_list ().end ();
          ++symnames_it) {

         std::map<std::string, wchar_t>::const_iterator w_cmap_it;
         if ((w_cmap_it = w_cmap.find(*symnames_it)) != w_cmap.end()) {
             wchar_t wchar_val = (*w_cmap_it).second;

             coll_map_iter coll_map_it;
             if ((coll_map_it = coll_map_.find(wchar_val)) != coll_map_.end()) {
                 if (coll_map_it->second.offset == UINT_MAX) {
                     if (give_warning && !warning_issued) {
                         warning_issued = true;
                         warnings_occurred_ =
                             issue_diag (W_MISSING, false,
                                         0, "some characters in the codeset "
                                         "were not explicitly given a "
                                         "collation value\n")
                             || warnings_occurred_;
                     }
                     if (!collate_out_.undefined_optimization) {
                         add_to_coll (wchar_val, weights_template,
                                      coll_value++, ordinal_weights, true);
                     }
                 }
             }
         }
     }
     if (collate_out_.undefined_optimization){
         collate_out_.undefined_weight_idx = next_offset_++;
         undef_char_info_.offset = collate_out_.undefined_weight_idx;
         undef_char_info_.coll_val = coll_value++;
         // initialize the weight template with the undefined collation info
         Weights_t* weights = new Weights_t[collate_out_.num_weights];

         if (0 != weights_template) {
             for (int k = 0; k < collate_out_.num_weights; ++k) {
                 weights[k].size = weights_template[k].size;
                 for (int j = 0; j < 256; ++j)
                     weights[k].weight[j] = weights_template[k].weight[j];
             }
         }


         std::vector<bool>::const_iterator it;

         // now go through the vector that tells us if a particular
         // weight is based on the ordinal position of the element
         // and place the correct ordinal value in the weight vector
         // if it is.
         int i = 0;
         for (it = ordinal_weights.begin();
              it != ordinal_weights.end(); ++it, ++i)
         {
             // FIXME: this may not be correct
             if (!*it){
                 weights[i].size = 1;
                 weights[i].weight[0] = undef_char_info_.coll_val;
             }
         }
         undef_char_info_.weights = weights;


     }
 }

 void Def::add_to_coll (const wchar_t            w_cmap_cur_val,
                        const Weights_t*         weights_template,
                        const unsigned int       coll_value,
                        const std::vector<bool>& ordinal_weights,
                        bool                     undefined_value)
 {
     Weights_t* weights = new Weights_t[collate_out_.num_weights];

     if (0 != weights_template) {
         for (int k = 0; k < collate_out_.num_weights; ++k) {
             weights[k].size = weights_template[k].size;
             for (int j = 0; j < 256; ++j)
                 weights[k].weight[j] = weights_template[k].weight[j];
         }
     }

     // get the symbolic name from the value
     std::string name
         = charmap_.get_rw_cmap().find (w_cmap_cur_val)->second;
     coll_map_.find (w_cmap_cur_val)->second.coll_val = coll_value;

     unsigned int offset;
     if (undefined_value && collate_out_.undefined_optimization)
         offset = next_offset_;
     else
         offset = next_offset_++;

     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     w_cmap_iter w_cmap_pos = w_cmap.find(name);
     coll_map_iter coll_it = coll_map_.find (w_cmap_pos->second);
     coll_it->second.offset = offset;

     off_mapr_.insert(std::make_pair(offset, name));

     std::vector<bool>::const_iterator it;

     // now go through the vector that tells us if a particular
     // weight is based on the ordinal position of the element
     // and place the correct ordinal value in the weight vector
     // if it is.
     int i = 0;
     for (it = ordinal_weights.begin();
          it != ordinal_weights.end(); ++it, ++i)
     {
         if (*it){
             weights[i].size = 1;
             weights[i].weight[0] = coll_value;
         }
     }
     coll_it->second.weights = weights;
 }

 // if undefined optimization is on then only those characters that have
 // been defined should go into the valid set, otherwise all characters go
 // into the set.
 void Def::gen_valid_coll_mb_set ()
 {
     if (!valid_coll_mb_set_.empty())
         return;

     for (coll_map_iter coll_it = coll_map_.begin();
          coll_it != coll_map_.end(); ++coll_it) {
         if (coll_it->second.offset != UINT_MAX
             || !collate_out_.undefined_optimization){
             rmb_cmap_iter rn_cmap_it
                 = charmap_.get_rmb_cmap().find (coll_it->first);
             std::string valid = rn_cmap_it->second.substr
                 (0, rn_cmap_it->second.size() - 1);

             while (valid.size() > 0){
                 valid_coll_mb_set_.insert (valid);
                 valid = valid.substr(0, valid.size() - 1);
             }
         }
     }
 }


 // In processing the collate section of the locale definition file
 // we will filter out the characters that are not present in the
 // charmap file; these characters when encountered on the left-hand side
 // of a weight definition for a symbolic name will be left in place
 // and used as a mere collating symbol for the rest of the parsing -
 // i.e. its weights ignored and assigned the weight corresponding to
 // its position in the collate section.
 void Def::process_collate()
 {
     issue_diag (I_STAGE, false, 0, "processing LC_COLLATE section\n");

     // update flags
     collate_def_found_ = true;
     // initialization
     init_coll_map ();

     // first preprocess the collate section
     preprocess_collate ();

     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     // process the collating symbols list
     token_list_t::iterator cs_it = cs_list_.begin ();
     for (;cs_it != cs_list_.end (); ++cs_it) {
         if (w_cmap.find (cs_it->name) !=
             w_cmap.end())
             issue_diag (E_SYNTAX, true, &*cs_it,
                         "collating-symbol %s found in character map\n",
                         cs_it->name.c_str ());
         else if (ce_map_.find (cs_it->name) != ce_map_.end())
             issue_diag (E_SYNTAX, true, &*cs_it,
                         "collating-symbol %s has already been defined as "
                         "a collating-element\n", cs_it->name.c_str ());

         cs_map_.insert (std::make_pair (cs_it->name, 0));
     }

     // process the collating elements list
     std::list<collate_elem_t>::iterator ce_it =
         ce_list_.begin ();
     for (; ce_it != ce_list_.end (); ++ce_it) {
         if (w_cmap.find (ce_it->first.name) !=
             w_cmap.end()) {
             issue_diag (E_SYNTAX, true, &ce_it->first,
                         "collating element %s found in charmap\n",
                         ce_it->first.name.c_str());
         }

         // collating element info to be stored
         ce_info_t ce_tmp;
         ce_tmp.offset   = UINT_MAX;
         ce_tmp.coll_val = UINT_MAX;
         ce_tmp.order    = 0;
         ce_tmp.weights  = 0;

         // array of symbolic names
         std::vector<std::string> ce_sym_array;

         // get the definition of this collating element
         token_list_t& ce_def_list = ce_it->second;
         token_list_t::iterator ce_def_it = ce_def_list.begin ();
         bool invalid = false;

         if (ce_def_it->token == Scanner::tok_string) {
             if ((ce_tmp.ce_wstr = convert_wstring (*ce_def_it)).empty())
                 invalid = true;
         } else if (ce_def_it->token == Scanner::tok_sym_name) {
             for (; ce_def_it != ce_def_list.end (); ++ce_def_it)
                 ce_sym_array.push_back (ce_def_it->name);
             if ((ce_tmp.ce_wstr = convert_wstring (ce_sym_array)).empty())
                 invalid = true;
         } else {
             issue_diag (E_SYNTAX, true, &*ce_def_it,
                         "illegal collating-element expression: %s\n",
                         ce_def_it->name.c_str ());
         }
         if (invalid)
             issue_diag (W_SYNTAX, false, &*ce_def_it,
                         "character in collating element definition "
                         "not found in character map\n");

         if (!invalid) {
             ce_map_.insert (std::make_pair(ce_it->first.name, ce_tmp));

             std::string ce_str;
             std::string ce_utf8_wstr;
             for (unsigned int i = 0; i < ce_tmp.ce_wstr.size(); ++i){
                 ce_str += convert_to_ext(ce_tmp.ce_wstr[i]);
                 ce_utf8_wstr += utf8_encode (ce_tmp.ce_wstr[i]);
             }
             ce_sym_map_.insert (std::make_pair(ce_str, ce_it->first.name));
             ce_wsym_map_.insert (std::make_pair (ce_utf8_wstr,
                                                  ce_it->first.name));
         }
     }

     // preprocess the collation definitions and make known all
     // the symbolic names available
     preprocess_collation_definitions();

     // collation values 0 and 1 are reserved
     unsigned int coll_value = 2;

     // FIXME: assign values to the collating-elements AGAIN
     cs_it = sym_list_.begin ();
     for (; cs_it != sym_list_.end (); ++cs_it) {
         cs_map_iter csm_it = cs_map_.find (cs_it->name);
         if (csm_it != cs_map_.end ())
             csm_it->second = coll_value++;
     }

     // process the sections
     std::list<collate_section_t>::iterator sect_it =
         section_list_.begin ();
     for (; sect_it != section_list_.end (); ++sect_it)
         process_order (*sect_it, coll_value);

     // check to make sure that all the elements in the codeset were added
     if (!undefined_keyword_found_) {
         std::vector<bool> ordinal_weights;
         for (int i = 0; i < collate_out_.num_weights; ++i)
             ordinal_weights.push_back (false);
         collate_out_.undefined_optimization = true;
         add_missing_values (ordinal_weights, 0, coll_value, true);
     }

 }


 void Def::gen_n_to_w_coll_tables (const std::string &charp,
                                   unsigned int tab_num)
 {
     gen_valid_coll_mb_set();
     offset_tab_t tab;
     for (unsigned int k = 0; k <= UCHAR_MAX; ++k) {
         tab.off[k] = UINT_MAX;
     }

     tab.first_offset = -1;
     mb_cmap_iter n_cmap_it;

     for (unsigned int i = 0; i <= UCHAR_MAX; ++i){
         unsigned char cur_char = (unsigned char)i;
         std::string mb_char = charp;
         mb_char += (char)cur_char;
         if ((n_cmap_it = charmap_.get_mb_cmap().find (mb_char))
             != charmap_.get_mb_cmap().end()) {
             coll_map_iter coll_map_it = coll_map_.find (n_cmap_it->second);
             if (coll_map_it->second.offset != UINT_MAX
                 || !collate_out_.undefined_optimization) {
                 tab.off[cur_char] = coll_map_it->second.offset;
                 if (tab.first_offset == -1)
                     tab.first_offset = cur_char;
             }
         }
         else {
             if (valid_coll_mb_set_.find (mb_char) != valid_coll_mb_set_.end()){
                 ++next_tab_num_;
                 tab.off[cur_char] = next_tab_num_ | 0x80000000;
                 gen_n_to_w_coll_tables (mb_char, next_tab_num_);
                 if (tab.first_offset == -1)
                     tab.first_offset = cur_char;

             }
             else {
                 tab.off[cur_char] = UINT_MAX;
             }
         }
     }
     char_offs_.insert (std::make_pair (tab_num, tab));
 }


 void Def::gen_w_to_n_coll_tables (const std::string &charp,
                                   unsigned int tab_num)
 {
     // initialize a table used in ctype and collate
     create_wchar_utf8_table();
     gen_valid_coll_wchar_set();

     offset_tab_t tab;
     for (unsigned int k = 0; k <= UCHAR_MAX; ++k)
         tab.off[k] = UINT_MAX;

     tab.first_offset = -1;
     wchar_utf8_iter wu_it;
     mb_cmap_iter n_cmap_it;

     for (unsigned int i = 0; i <= UCHAR_MAX; ++i) {
         unsigned char cur_char = (unsigned char)i;
         std::string   mb_char = (charp);

         mb_char += (char)cur_char;

         wu_it = wchar_utf8_to_ext_.find (mb_char);
         if (wu_it != wchar_utf8_to_ext_.end()) {
             n_cmap_it = charmap_.get_mb_cmap().find (wu_it->second);
             if (n_cmap_it != charmap_.get_mb_cmap().end ()) {

                 coll_map_iter coll_map_it = coll_map_.find (n_cmap_it->second);
                 if (coll_map_it->second.offset != UINT_MAX ||
                     !collate_out_.undefined_optimization) {
                     tab.off[cur_char] = coll_map_it->second.offset;
                     if (tab.first_offset == -1)
                         tab.first_offset = cur_char;
                 }
             }
         }
         else
             if (valid_coll_wchar_set_.find (mb_char) !=
                 valid_coll_wchar_set_.end()) {

                 ++next_wchar_coll_tab_num_;
                 tab.off[cur_char] = next_wchar_coll_tab_num_ | 0x80000000;
                 gen_w_to_n_coll_tables (mb_char,
                                         next_wchar_coll_tab_num_);
                 if (tab.first_offset == -1)
                     tab.first_offset = cur_char;
             }
             else
                 tab.off[cur_char] = UINT_MAX;
     }

     w_to_n_coll_.insert (std::make_pair (tab_num, tab));
 }


 void Def::gen_n_ce_tables (const std::set<std::string>ce_elms,
                            unsigned int idx, unsigned int tab_num)
 {
     if (ce_elms.size() > 0) {
         ce_offset_tab_t tab;
         for (unsigned int k = 0; k <= UCHAR_MAX; ++k) {
             tab.off[k] = UINT_MAX;
         }
         tab.first_offset = -1;
         tab.last_offset = 0;
         ce_sym_map_iter ce_sym_map_it;
         ce_map_iter ce_map_it;

         std::set<std::string>::const_iterator ce_elms_it;
         std::set<std::string> next_elms;

         for (unsigned int i = 0; i <= UCHAR_MAX; ++i) {
             next_elms.clear();
             for (ce_elms_it = ce_elms.begin(); ce_elms_it != ce_elms.end();
                  ce_elms_it ++) {

                 if ((unsigned char)i == (unsigned char)((*ce_elms_it)[idx])) {
                     if ((*ce_elms_it).size() == idx + 1) {
                         if ((ce_sym_map_it = ce_sym_map_.find (*ce_elms_it))
                             != ce_sym_map_.end()) {
                             ce_map_it = ce_map_.find(ce_sym_map_it->second);
                             tab.off[i] = ce_map_it->second.offset;
                             if (tab.first_offset == -1)
                                 tab.first_offset = i;
                             if ((unsigned int)tab.last_offset < i)
                                 tab.last_offset = i;
                         }
                     }
                     else {

                         next_elms.insert (*ce_elms_it);
                         if (tab.off[i] == UINT_MAX) {
                             ++ next_n_ce_tab_num_;
                             tab.off[i] = next_n_ce_tab_num_ | 0x80000000;
                         }
                         if (tab.first_offset == -1)
                             tab.first_offset = i;
                         if ((unsigned int)tab.last_offset < i)
                             tab.last_offset = i;
                     }
                 }
             }
             // now recursively call gen_n_ce_tables if any collating
             // elements with this character were found;
             if (next_elms.size() != 0)
                 gen_n_ce_tables (next_elms, idx + 1, next_n_ce_tab_num_);
         }
         n_ce_offs_.insert(std::make_pair(tab_num, tab));

     }
 }


 void Def::gen_w_ce_tables (const std::set<std::string>ce_elms,
                            unsigned int idx, unsigned int tab_num)
 {
     if (ce_elms.size() > 0) {
         ce_offset_tab_t tab;
         for (unsigned int k = 0; k <= UCHAR_MAX; ++k) {
             tab.off[k] = UINT_MAX;
         }
         tab.first_offset = -1;
         tab.last_offset = 0;
         ce_sym_map_iter ce_sym_map_it;
         ce_map_iter ce_map_it;

         std::set<std::string>::const_iterator ce_elms_it;
         std::set<std::string> next_elms;

         for (unsigned int i = 0; i <= UCHAR_MAX; ++i) {
             next_elms.clear();
             for (ce_elms_it = ce_elms.begin(); ce_elms_it != ce_elms.end();
                  ce_elms_it ++) {

                 if ((unsigned char)i == (unsigned char)(*ce_elms_it)[idx]) {
                     if ((*ce_elms_it).size() == idx + 1) {
                         if ((ce_sym_map_it = ce_wsym_map_.find (*ce_elms_it))
                             != ce_wsym_map_.end()) {
                             ce_map_it = ce_map_.find(ce_sym_map_it->second);
                             tab.off[i] = ce_map_it->second.offset;
                             if (tab.first_offset == -1)
                                 tab.first_offset = i;
                             if ((unsigned int)tab.last_offset < i)
                                 tab.last_offset = i;
                         }
                     }
                     else {
                         next_elms.insert (*ce_elms_it);
                         if (tab.off[i] == UINT_MAX) {
                             ++ next_w_ce_tab_num_;
                             tab.off[i] = next_w_ce_tab_num_ | 0x80000000;
                         }
                         if (tab.first_offset == -1)
                             tab.first_offset = i;
                         if ((unsigned int)tab.last_offset < i)
                             tab.last_offset = i;
                     }
                 }
             }
             // now recursively call gen_w_ce_tables if any collating
             // elements with this character were found;
             if (next_elms.size() != 0)
                 gen_w_ce_tables (next_elms, idx + 1, next_w_ce_tab_num_);
         }
         w_ce_offs_.insert(std::make_pair(tab_num, tab));
     }
 }


 void Def::dump_collate ()
 {
     std::cout << "LC_COLLATE\n";

     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     token_list_t::iterator sl_it = sym_list_.begin ();
     for (; sl_it != sym_list_.end (); ++sl_it) {
         std::cout << sl_it->name << " % ";
         cs_map_iter cs_it = cs_map_.find (sl_it->name);
         if (cs_it != cs_map_.end ()) {
             std::cout << cs_it->second << '\n';
             continue;
         }

         ce_map_iter ce_it = ce_map_.find (sl_it->name);
         if (ce_it != ce_map_.end ()) {
             std::cout << ce_it->second.coll_val << '\n';
             continue;
         }

         std::map<std::string, wchar_t >::const_iterator cw_it =
             w_cmap.find (sl_it->name);
         if (cw_it != w_cmap.end ()) {
             coll_map_iter cm_it =
                 coll_map_.find (cw_it->second);
             if (cm_it != coll_map_.end ()) {
                 std::cout << cm_it->second.coll_val << '\n';
                 continue;
             }
         }
         std::cout << '\n';
     }

     std::cout << "\n\n";

     std::list<collate_section_t>::iterator sect_it =
         section_list_.begin ();
     while (sect_it != section_list_.end ()) {

         // dump the orders
         std::cout << "order_start " << sect_it->name;
         token_list_t::iterator o_it = sect_it->order.begin ();
         for (; o_it != sect_it->order.end (); ++o_it)
             std::cout << ";" << o_it->name;
         std::cout << '\n';

         // for each entry in the entries list
         collate_entry_list_t::iterator e_it = sect_it->entries.begin ();
         for (; e_it != sect_it->entries.end (); ++e_it) {

             // dump the collation definition (weights included)
             std::cout << e_it->first.name << " ";
             if (!e_it->second.empty ()) {
                 token_list_t::iterator w_it = e_it->second.begin ();
                 std::cout << (w_it++)->name;
                 for (; w_it != e_it->second.end (); ++w_it)
                     std::cout << ";" << w_it->name;
             }

             // lookup the value associated with this collation symbol
             cs_map_iter cs_it = cs_map_.find (e_it->first.name);
             if (cs_it != cs_map_.end ()) {
                 std::cout << " % " << cs_it->second << '\n';
                 continue;
             }

             ce_map_iter ce_it = ce_map_.find (e_it->first.name);
             if (ce_it != ce_map_.end ()) {
                 std::cout << " % " << ce_it->second.coll_val << '\n';
                 continue;
             }

             std::map<std::string, wchar_t >::const_iterator cw_it =
                 w_cmap.find (e_it->first.name);
             if (cw_it != w_cmap.end ()) {
                 coll_map_iter cm_it =
                     coll_map_.find (cw_it->second);
                 if (cm_it != coll_map_.end ()) {
                     std::cout << " % " << cm_it->second.coll_val << '\n';
                     continue;
                 }

             }
             std::cout << '\n';
         }

         std::cout << "order_end\n";
         ++sect_it;
     }
     std::cout << "END LC_COLLATE\n";
 }


 void Def::write_collate (std::string dir_name)
 {
     assert (!dir_name.empty());

     static const char lc_name[] = "LC_COLLATE";

     if (collate_written_)
         return;

     if (!collate_def_found_) {
         issue_diag (I_SKIP, false, 0,
                     "%s section not found, skipping\n", lc_name);
         return;
     }

     next_tab_num_ = 0;
     next_wchar_coll_tab_num_ = 0;
     std::set<std::string> ce_strs;
     ce_sym_map_iter it;
     ce_map_iter ce_mit;
     for (it = ce_sym_map_.begin (); it != ce_sym_map_.end (); ++it) {
         if ((ce_mit = ce_map_.find (it->second))->second.coll_val
             != UINT_MAX) {
             // check to see of the largest_ce needs to be changed
             if (ce_mit->second.ce_wstr.size() + 1
                 > collate_out_.largest_ce)
                 collate_out_.largest_ce = ce_mit->second.ce_wstr.size();

             ce_strs.insert (it->first);
         }
     }
     next_n_ce_tab_num_ = 0;
     gen_n_ce_tables(ce_strs, 0, 0);
     ce_strs.clear();
     for (it = ce_wsym_map_.begin (); it != ce_wsym_map_.end (); ++it) {
         if (ce_map_.find (it->second)->second.coll_val != UINT_MAX)
             ce_strs.insert (it->first);
     }
     next_w_ce_tab_num_ = 0;
     gen_w_ce_tables (ce_strs, 0, 0);

     gen_n_to_w_coll_tables ("", next_tab_num_);
     gen_w_to_n_coll_tables ("", next_wchar_coll_tab_num_);

     (dir_name += _RWSTD_PATH_SEP) += lc_name;

     issue_diag (I_OPENWR, false, 0, "writing %s\n", dir_name.c_str ());

     std::ofstream out (dir_name.c_str(), std::ios::binary);
     out.exceptions (std::ios::failbit | std::ios::badbit);

     unsigned int i;

     // calculate the size of an individual weight element
     collate_out_.elm_size = collate_out_.num_weights
         * collate_out_.longest_weight * sizeof (unsigned int) +
         sizeof (unsigned int);

     // the first section of the collate database is the collating
     // element information
     collate_out_.n_ce_tab_off = 0;
     collate_out_.w_ce_tab_off = collate_out_.n_ce_tab_off;
     n_ce_offs_iter n_ce_offs_it;
     for (n_ce_offs_it = n_ce_offs_.begin();
          n_ce_offs_it != n_ce_offs_.end (); ++n_ce_offs_it) {
         collate_out_.w_ce_tab_off += (n_ce_offs_it->second.last_offset
                                       - n_ce_offs_it->second.first_offset + 1)* sizeof (int);
     }

     // next comes the weight information
     collate_out_.weight_tab_off = collate_out_.w_ce_tab_off;
     w_ce_offs_iter w_ce_offs_it;
     for (w_ce_offs_it = w_ce_offs_.begin();
          w_ce_offs_it != w_ce_offs_.end(); ++w_ce_offs_it) {
         collate_out_.weight_tab_off += (w_ce_offs_it->second.last_offset
                                         - w_ce_offs_it->second.first_offset + 1)* sizeof (int);
     }

     coll_map_iter coll_map_pos;

     collate_out_.num_elms = off_mapr_.size();
     if (collate_out_.undefined_optimization)
         ++collate_out_.num_elms;

     // now calculate the offset for the first narrow character table
     collate_out_.n_char_tab_off = collate_out_.weight_tab_off
         + collate_out_.num_elms * collate_out_.elm_size;

     // now calculate the offset fo the fist wide character table
     // but first we need the size of the narrow tables
     char_offs_iter char_offs_it;
     unsigned int char_offs_size = 0;
     for (char_offs_it = char_offs_.begin();
          char_offs_it != char_offs_.end(); ++char_offs_it) {
         char_offs_size += (UCHAR_MAX + 1
                            - char_offs_it->second.first_offset)
             * sizeof (unsigned int);
     }

     collate_out_.w_char_tab_off = collate_out_.n_char_tab_off
         + char_offs_size;

     // now calculate the offset for the narrow character offset table
     // but first we need the size of the wide tables
     w_to_n_coll_iter w_to_n_coll_it;
     unsigned int w_to_n_size = 0;
     for (w_to_n_coll_it = w_to_n_coll_.begin();
          w_to_n_coll_it != w_to_n_coll_.end(); ++w_to_n_coll_it) {
         w_to_n_size += (UCHAR_MAX + 1
                         - w_to_n_coll_it->second.first_offset)
             * sizeof (unsigned int);
     }

     collate_out_.n_char_off_tab_off = collate_out_.w_char_tab_off
         + w_to_n_size;

     // now calculate the offset for the wide character offset table
     collate_out_.w_char_off_tab_off = collate_out_.n_char_off_tab_off
         + char_offs_.size() * sizeof (unsigned int);

     // calculate the offset for the narrow collating element offset table
     collate_out_.n_ce_off_tab_off = collate_out_.w_char_off_tab_off
         + w_to_n_coll_.size() * sizeof (unsigned int);

     // calculate the offset for the wide collating element offset table
     collate_out_.w_ce_off_tab_off = collate_out_.n_ce_off_tab_off
         + n_ce_offs_.size() * sizeof (unsigned int);

     // now calculate the offset of the first character information
     collate_out_.n_char_first_char_off = collate_out_.w_ce_off_tab_off
         + w_ce_offs_.size() * sizeof (unsigned int);

     // now calculate the offset of the wide table first char info
     collate_out_.w_char_first_char_off = collate_out_.n_char_first_char_off
         + char_offs_.size() * sizeof (unsigned char);

     // now calculate the offset of the narrow ce first character info
     collate_out_.n_ce_first_char_off = collate_out_.w_char_first_char_off
         + w_to_n_coll_.size() * sizeof (unsigned char);

     // now calculate the offset of the wide ce first character info
     collate_out_.w_ce_first_char_off = collate_out_.n_ce_first_char_off
         + n_ce_offs_.size() * sizeof (unsigned char);

     // now calculate the offset of the narrow ce last character info
     collate_out_.n_ce_last_char_off = collate_out_.w_ce_first_char_off
         + w_ce_offs_.size() * sizeof (unsigned char);

     // now calculate the offset of the wide ce last character info
     collate_out_.w_ce_last_char_off = collate_out_.n_ce_last_char_off
         + n_ce_offs_.size() * sizeof (unsigned char);

     // now calculate the offset of the codeset name
     collate_out_.codeset_off = collate_out_.w_ce_last_char_off
         + w_ce_offs_.size() * sizeof (unsigned char);

     // finally calculate the offset of the charmap name
     collate_out_.charmap_off = collate_out_.codeset_off
         + charmap_.get_code_set_name().size() + 1;


     // print out the collate struct
     out.write ((char*)&collate_out_, sizeof(collate_out_));

     for (n_ce_offs_it = n_ce_offs_.begin();
          n_ce_offs_it != n_ce_offs_.end(); ++n_ce_offs_it) {
         for (i = (unsigned int)n_ce_offs_it->second.first_offset;
              i <= (unsigned int)n_ce_offs_it->second.last_offset; ++i)
             out.write ((char*)&n_ce_offs_it->second.off[i],
                        sizeof (n_ce_offs_it->second.off[i]));
     }

     for (w_ce_offs_it = w_ce_offs_.begin();
          w_ce_offs_it != w_ce_offs_.end(); ++w_ce_offs_it) {
         for (i = (unsigned int)w_ce_offs_it->second.first_offset;
              i <= (unsigned int)w_ce_offs_it->second.last_offset; ++i)
             out.write ((char*)&w_ce_offs_it->second.off[i],
                        sizeof (w_ce_offs_it->second.off[i]));
     }


     // now print out the weight array
     unsigned int maxw = UINT_MAX;
     Weights_t* weights;
     bool undefined_written = false;

     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     std::size_t off_idx = 0;
     off_mapr_iter current_off = off_mapr_.begin();
     for (; current_off != off_mapr_.end(); ++off_idx) {

         // check the current offset
         if (current_off->first != off_idx
             && off_idx != undef_char_info_.offset)
             issue_diag (E_RANGE, true, 0,
                         "current_off : %d, index : %d, giving up\n",
                         current_off->first, off_idx);

         if (collate_out_.undefined_optimization
             && off_idx == undef_char_info_.offset) {
             undefined_written = true;
             weights = undef_char_info_.weights;
             out.write ((char*)&undef_char_info_.order,
                        sizeof (undef_char_info_.order));
             for (int j = 0; j < collate_out_.num_weights; ++j) {
                 for (int k = 0; k < weights[j].size; ++k) {
                     out.write ((char*)&weights[j].weight[k],
                                sizeof (weights[j].weight[k]));
                 }
                 for (int c = weights[j].size;
                      c < collate_out_.longest_weight; ++c)
                     out.write ((char*)&maxw, sizeof (maxw));
             }
             continue;
         }

         w_cmap_iter w_cmap_pos = w_cmap.find
             (current_off->second);
         if (w_cmap_pos != w_cmap.end()) {
             coll_map_pos = coll_map_.find (w_cmap_pos->second);
             out.write ((char*)&coll_map_pos->second.order,
                        sizeof (coll_map_pos->second.order));
             weights = coll_map_pos->second.weights;
             for (int j = 0; j < collate_out_.num_weights; ++j) {
                 for (int k = 0; k < weights[j].size; ++k) {
                     out.write ((char*)&weights[j].weight[k],
                                sizeof (weights[j].weight[k]));
                 }
                 for (int c = weights[j].size;
                      c < collate_out_.longest_weight; ++c)
                     out.write ((char*)&maxw, sizeof (maxw));
             }
             ++current_off;
         }

         else {
             ce_map_iter ce_map_it = ce_map_.find (current_off->second);
             if (ce_map_it != ce_map_.end()
                 && ce_map_it->second.coll_val != UINT_MAX) {
                 out.write ((char*)&ce_map_it->second.order,
                            sizeof (ce_map_it->second.order));
                 weights = ce_map_it->second.weights;
                 for (int j = 0; j < collate_out_.num_weights; ++j) {
                     for (int k = 0; k < weights[j].size; ++k) {
                         out.write ((char*)&weights[j].weight[k],
                                    sizeof (weights[j].weight[k]));
                     }
                     for (int c = weights[j].size;
                          c < collate_out_.longest_weight; ++c)
                         out.write ((char*)&maxw, sizeof (maxw));
                 }

             }
             ++current_off;
         }
     }

     // maske sure that we got to the undefined value
     if (!undefined_written && collate_out_.undefined_optimization) {
         if (off_idx != undef_char_info_.offset)
             issue_diag (E_RANGE, true, 0,
                         "current_off : %d, index : %d, giving up\n",
                         undef_char_info_.offset, off_idx);

         weights = undef_char_info_.weights;
         out.write ((char*)&undef_char_info_.order,
                    sizeof (undef_char_info_.order));
         for (int j = 0; j < collate_out_.num_weights; ++j) {
             for (int k = 0; k < weights[j].size; ++k) {
                 out.write ((char*)&weights[j].weight[k],
                            sizeof (weights[j].weight[k]));
             }
             for (int c = weights[j].size;
                  c < collate_out_.longest_weight; ++c)
                 out.write ((char*)&maxw, sizeof (maxw));
         }
     }

     // now print out the narrow character tables
     for (char_offs_it = char_offs_.begin();
          char_offs_it != char_offs_.end(); ++char_offs_it) {
         for (unsigned int c = char_offs_it->second.first_offset;
              c <= UCHAR_MAX; ++c) {
             out.write ((const char*)&char_offs_it->second.off[c],
                        sizeof (char_offs_it->second.off[c]));
         }
     }

     // now print out the wide character tables
     for (w_to_n_coll_it = w_to_n_coll_.begin();
          w_to_n_coll_it != w_to_n_coll_.end(); ++w_to_n_coll_it) {
         for (unsigned int c = w_to_n_coll_it->second.first_offset;
              c <= UCHAR_MAX; ++c) {
             out.write ((const char*)&w_to_n_coll_it->second.off[c],
                        sizeof (w_to_n_coll_it->second.off[c]));
         }
     }

     // now print the narrow character table offsets
     unsigned int current_offset = 0;
     for (char_offs_it = char_offs_.begin();
          char_offs_it != char_offs_.end(); ++char_offs_it) {
         out.write ((const char*)&current_offset, sizeof (current_offset));
         current_offset += (UCHAR_MAX + 1
                            - char_offs_it->second.first_offset);
     }

     // now print the wide character table offsets
     current_offset = 0;
     for (w_to_n_coll_it = w_to_n_coll_.begin();
          w_to_n_coll_it != w_to_n_coll_.end(); ++w_to_n_coll_it) {
         out.write ((const char*)&current_offset, sizeof (current_offset));
         current_offset += (UCHAR_MAX + 1
                            - w_to_n_coll_it->second.first_offset);
     }

     // now print the narrow ce table offsets
     current_offset = 0;
     for (n_ce_offs_it = n_ce_offs_.begin();
          n_ce_offs_it != n_ce_offs_.end(); ++n_ce_offs_it) {
         out.write ((const char*)&current_offset, sizeof (current_offset));
         current_offset += (n_ce_offs_it->second.last_offset
                            - n_ce_offs_it->second.first_offset + 1);
     }

     // now print the wide ce table offsets
     current_offset = 0;
     for (w_ce_offs_it = w_ce_offs_.begin();
          w_ce_offs_it != w_ce_offs_.end(); ++w_ce_offs_it) {
         out.write ((const char*)&current_offset, sizeof (current_offset));
         current_offset += (w_ce_offs_it->second.last_offset
                            - w_ce_offs_it->second.first_offset + 1);
     }


     // now print out the narrow character tables starting character
     for (char_offs_it = char_offs_.begin();
          char_offs_it != char_offs_.end(); ++char_offs_it) {
         const char off = char ((char_offs_it->second).first_offset);
         out << off;
     }

     // now print out the wide character tables starting character
     for (w_to_n_coll_it = w_to_n_coll_.begin();
          w_to_n_coll_it != w_to_n_coll_.end(); ++w_to_n_coll_it) {
         const char off = char ((w_to_n_coll_it->second).first_offset);
         out << off;
     }

     // now print out the narrow ce tables starting character
     for (n_ce_offs_it = n_ce_offs_.begin();
          n_ce_offs_it != n_ce_offs_.end(); ++n_ce_offs_it) {
         const char off = char ((n_ce_offs_it->second).first_offset);
         out << off;
     }

     // now print out the wide ce tables starting character
     for (w_ce_offs_it = w_ce_offs_.begin();
          w_ce_offs_it != w_ce_offs_.end(); ++w_ce_offs_it) {
         const char off = char ((w_ce_offs_it->second).first_offset);
         out << off;
     }

     // now print out the narrow ce tables ending character
     for (n_ce_offs_it = n_ce_offs_.begin();
          n_ce_offs_it != n_ce_offs_.end(); ++n_ce_offs_it) {
         const char off = char ((n_ce_offs_it->second).last_offset);
         out << off;
     }

     // now print out the wide ce tables ending character
     for (w_ce_offs_it = w_ce_offs_.begin();
          w_ce_offs_it != w_ce_offs_.end(); ++w_ce_offs_it) {
         const char off = char ((w_ce_offs_it->second).last_offset);
         out << off;
     }

     // finally write the codeset and charmap names
     out << charmap_.get_code_set_name() << std::ends
         << charmap_.get_charmap_name() << std::ends;
 }


 void Def::init_coll_map() {

     rw_cmap_iter rw_cmap_pos;
     collate_info_t tmp = {UINT_MAX, UINT_MAX, 0, 0};

     for (rw_cmap_pos = charmap_.get_rw_cmap().begin();
          rw_cmap_pos != charmap_.get_rw_cmap().end();
          ++rw_cmap_pos) {
         coll_map_.insert (std::make_pair (rw_cmap_pos->first, tmp));
     }
 }


 // In the preprocess_collate member function the collate section is
 // preprocessed and the result of the preprocessing is stored in-memory
 // as a number of lists; the content of these lists is then preprocessed
 // acoording to the reorder statements; the result is then passed
 // on to process_collate.
 void Def::preprocess_collate ()
 {
     int nesting_level = 0;

     while (true) {
         // fetch next token
         next = scanner_.next_token();

         switch (next.token) {
         case Scanner::tok_end:
             next = scanner_.next_token();
             if (next.token == Scanner::tok_collate) {
                 // end of collation block
                 if (nesting_level == 0)
                     return;

                 nesting_level--;
                 scanner_.close ();
             } else
                 issue_diag (E_SYNTAX, true, &next,
                             "wrong section name in END directive\n");

             break;
         case Scanner::tok_copy: {
             next = scanner_.next_token();
             if (next.token != Scanner::tok_string)
                 issue_diag (E_SYNTAX, true, &next,
                             "expected string following \"copy\" directive\n");

             // bump up the nesting level
             ++nesting_level;

             issue_diag (I_STAGE, false, 0, "processing copy directive\n");

             // open the file
             scanner_.open (get_pathname (strip_quotes (next.name), next.file));

             // get comment char and escape char;
             // these informations are stored by the scanner
             while ((next = scanner_.next_token ()).token
                    != Scanner::tok_collate ){
                 // the LC_IDENTIFICATION section may also have a
                 // LC_COLLATE token that will mess up the parsing
                 if (next.token == Scanner::tok_ident) {
                     while ((next = scanner_.next_token()).token
                            != Scanner::tok_end );
                     next = scanner_.next_token();
                 }
             }

             break;
         }

         // a collating element definition defines a symbolic name that
         // represents symbolically the congregation of two other symbolic
         // names which have to be present in the character map;
         // the form of the phrase is:
         // collating-element sym from ("string" | (sym sym+))
         case Scanner::tok_coll_elem: {
             next = scanner_.next_token();
             // we expect a symbolic name
             if (next.token != Scanner::tok_sym_name)
                 issue_diag (E_SYNTAX, true, &next,
                             "symbolic name expected following "
                             "collating-element\n");
             // symbolic name
             token_t sym (next);

             next = scanner_.next_token ();
             if (next.token != Scanner::tok_from)
                 issue_diag (E_SYNTAX, true, &next,
                             "illegal collating-element expression\n");

             token_list_t ce_def_list;
             next = scanner_.next_token ();
             if (next.token == Scanner::tok_string) {
                 ce_def_list.push_back (next);
                 ce_list_.push_back (std::make_pair(sym,ce_def_list));
             } else if (next.token == Scanner::tok_sym_name) {
                 do {
                     ce_def_list.push_back (next);
                     next = scanner_.next_token();
                 } while (next.token != Scanner::tok_nl);
                 ce_list_.push_back (std::make_pair(sym,ce_def_list));
             } else
                 issue_diag (E_SYNTAX, true, &next,
                             "illegal collating-element expression\n");
             break;
         }
         case Scanner::tok_coll_sym:
             next = scanner_.next_token();
             if (next.token != Scanner::tok_sym_name)
                 issue_diag (E_SYNTAX, true, &next,
                             "symbolic name expected following "
                             "collating-symbol\n");
             cs_list_.push_back (next);
             break;
         case Scanner::tok_script:
             next = scanner_.next_token ();
             if (next.token != Scanner::tok_sym_name)
                 issue_diag (W_SYNTAX, false, &next,
                             "expecting script name, got %s\n",
                             next.name.c_str ());
             script_list_.push_back(next);
             break;
         case Scanner::tok_sym_name:
             sym_list_.push_back (next);
             break;
         case Scanner::tok_order_start:
             preprocess_order();
             break;

         case Scanner::tok_reorder:
             preprocess_reorder ();
             break;

         case Scanner::tok_reorder_section:
             preprocess_reorder_section ();
             break;

         default:
             break;
         }
     }

 }

 // The task of preprocess_order is to parse and model the content of the
 // order sections in the input files
 void Def::preprocess_order ()
 {
     // one order-start keyword has been encountered;
     // push a collate_section down the list and use it
     while (next.token != Scanner::tok_order_end) {
         section_list_.push_back (collate_section_t());
         collate_section_t& section = section_list_.back ();

         next = scanner_.next_token ();

         // store the orders
         while (next.token != Scanner::tok_nl) {
             if (next.token == Scanner::tok_sym_name) {
                 // symbolic name, has to be a section name
                 section.name = next.name;
             } else {
                 // might be a combination of order and position
                 // do the same until a better way is found
                 section.order.push_back (next);
             }

             // fetch next token
             next = scanner_.next_token ();
         }

         // unnamed sections
         if (section.name == "")
             section.name = "unnamed";

         issue_diag (I_STAGE, false, 0,
                     "processing %s order\n", section.name.c_str ());

         // store the collation statements
         while (!(next.token == Scanner::tok_order_end ||
                  next.token == Scanner::tok_order_start )) {

             next = scanner_.next_token ();
             switch (next.token) {
             case Scanner::tok_end:
                 issue_diag (E_SYNTAX, true, &next,
                             "unexpected END directive while "
                             "parsing collate section\n");
             case  Scanner::tok_nl:
                 continue;
             case Scanner::tok_comment:
                 scanner_.ignore_line ();
                 break;
             case Scanner::tok_undefined:
             case Scanner::tok_sym_name:
             {
                 token_t sym (next);
                 section.entries.push_back (
                     std::make_pair(sym,token_list_t()));
                 collate_entry_t& entry = section.entries.back ();

                 next = scanner_.next_token ();
                 while (next.token != Scanner::tok_nl) {
                     entry.second.push_back (next);
                     next = scanner_.next_token ();
                 }
                 break;
             }
                         default:
                                 break;
             }
         }
     }
 }


 // Handles the reorder statements in the form:
 //    reorder-after <sym>
 //    <sym> <sym><sym>...
 // OR
 //    reorder-after <sym>
 //    ..  <sym><sym>.....
 // followed by reorder-end statement or another reorder-after
 void Def::preprocess_reorder ()
 {
     while (true) {
         // process one or more statements grouped
         // under a reorder section

         // retrieve the symbol
         next = scanner_.next_token ();
         if (next.token != Scanner::tok_sym_name)
             issue_diag (E_SYNTAX, true, &next,
                        "expecting symbolic name, got %s\n",
                        next.name.c_str ());

         // store the symbolic name
         token_t sym (next);
         collate_entry_list_t entries_list;
         while (true) {

             // process the statements
             next = scanner_.next_token ();

             if (next.token == Scanner::tok_nl )
                 continue;
             else if (   next.token == Scanner::tok_sym_name
                      || next.token == Scanner::tok_hex_ellipsis) {
                 // the line will contain one single symbolic name
                 // or a complete collation statement
                 collate_entry_t entry;
                 entry.first = next;

                 next = scanner_.next_token ();
                 while (next.token != Scanner::tok_nl) {
                     entry.second.push_back (next);
                     next = scanner_.next_token ();
                 }

                 // add the entry to the list
                 entries_list.push_back (entry);
             } else if (next.token == Scanner::tok_reorder) {
                 // call insert_entry
                 if (!entries_list.empty ())
                     insert_entries (sym, entries_list);
                 entries_list.clear ();
                 break;
             } else if (next.token == Scanner::tok_reorder_end ) {
                 // call insert_entry
                 // call insert_entry
                 if (!entries_list.empty ())
                     insert_entries (sym, entries_list);
                 return;
             } else {
                 issue_diag (E_SYNTAX, true, &next,
                             "unexpected token : %s in a reorder block\n",
                             next.name.c_str ());
             }
         }
     }
 }


 void Def::preprocess_reorder_section ()
 {
     while (next.token != Scanner::tok_reorder_section_end) {
         // process one or more statements grouped
         // under a reorder section

         // retrieve the symbol
         next = scanner_.next_token ();
         token_t sym (next);

         // process the statements
         while (!(next.token == Scanner::tok_reorder_section ||
                  next.token == Scanner::tok_reorder_section_end)) {

             next = scanner_.next_token ();
             switch (next.token) {
             case Scanner::tok_nl:
                 continue;
             case Scanner::tok_sym_name: {
                 // the line will contain one single symbolic name
                 // which is the name of a section
                 next = scanner_.next_token ();
                 if (next.token != Scanner::tok_sym_name) {
                     issue_diag (E_SYNTAX, true, &next,
                                 "expecting section name, got %s\n",
                                 next.name.c_str ());
                 }

                 std::list<collate_section_t>::iterator beg =
                     section_list_.begin ();
                 std::list<collate_section_t>::iterator end =
                     section_list_.end ();
                 std::list<collate_section_t>::iterator ref_it, mov_it;
                 for (ref_it = beg;
                      ref_it->name != sym.name && ref_it != end;
                      ++ref_it);
                 for (mov_it = beg;
                      mov_it->name != next.name && mov_it != end;
                      ++mov_it);

                 if (ref_it == end || mov_it == end)
                     issue_diag (E_SYNTAX, true, &next,
                                 "missing sections %s %s requested\n",
                                 sym.name.c_str (), next.name.c_str ());

                 // replace it
                 section_list_.splice (++ref_it, section_list_,
                                       mov_it, mov_it);

                 // replace the insertion point
                 sym = next;
                 break;
             }
             default:
                 issue_diag (E_SYNTAX, true, &next,
                             "unexpected token : %s in a "
                             "reorder section block\n",
                             next.name.c_str ());
                 break;
             }

         }
     }
 }


 void Def::list_collate ()
 {
     // print the script list
     token_list_t::iterator script_it = script_list_.begin ();
     while (script_it != script_list_.end ()) {
         std::cout << "script  " << (script_it++)->name << '\n';
     }
     std::cout << '\n';

     // print the collating symbols' list
     token_list_t::iterator cs_it = cs_list_.begin ();
     while (cs_it != cs_list_.end ()) {
         std::cout << "collating-symbol     " << (cs_it++)->name << '\n';
     }
     std::cout << '\n';

     // print the collating elements list
     std::list<collate_elem_t>::iterator ce_it = ce_list_.begin ();
     while (ce_it != ce_list_.end ()) {
         std::cout << "collating-element " << ce_it->first.name
                   << " from ";
         token_list_t::iterator ce_def_it = ce_it->second.begin ();
         for (;ce_def_it != ce_it->second.end (); ce_def_it++) {
             std::cout << ce_def_it->name << ";";
         }
         std::cout << '\n';
     }
     std::cout << '\n';

     // print out general/global symbols' list
     token_list_t::iterator sym_it = sym_list_.begin ();
     while (sym_it != sym_list_.end ()) {
         std::cout << (sym_it++)->name << '\n';
     }
     std::cout << '\n';

     // print out sections
     std::list<collate_section_t>::iterator sc_it =
         section_list_.begin ();
     while (sc_it != section_list_.end ()) {
         // print prolog
         std::cout << "order_start ";
         if (sc_it->name != "unnamed")
             std::cout << sc_it->name << ";";

         token_list_t::iterator it = sc_it->order.begin ();
         while (it != sc_it->order.end ())
             std::cout << (it++)->name << ";";
         std::cout << '\n';

         std::list<collate_entry_t>::iterator e_it =
             sc_it->entries.begin ();
         while (e_it != sc_it->entries.end ()) {
             std::cout << e_it->first.name << "   ";
             token_list_t::iterator w_it =  e_it->second.begin ();
             while (w_it != e_it->second.end ())
                 std::cout << (w_it++)->name << ";";
             std::cout << '\n';
             ++e_it;
         }

         ++sc_it;
     }
     std::cout << '\n' << "order_end\n";
 }


 // FIXME - modify the algorithm to get log(N) complexity
 // Inserts an entry inside the section information;
 // searching for the appropriate entry in the list is
 // done in linear time
 bool Def::insert_entries (token_t& s, collate_entry_list_t& e)
 {
     // first remove these entries if found
     collate_entry_list_t::iterator r_it = e.begin ();
     for (; r_it != e.end (); ++r_it)
         remove_entry (*r_it);

     // first search through the symbols list; if found check
     // the collate_entry object; it should not have weights
     token_list_t::iterator it = sym_list_.begin ();
     for (; it != sym_list_.end (); ++it) {
         if (it->name != s.name)
             continue;

         // FIXME - all statements have to have no weights if they are
         // to be inserted after collation symbols outside section boundaries;
         // found it;  check one collate_entry_t object
         if (!e.begin ()->second.empty ()) {
             issue_diag (E_REORD, true, &s,
                         "requested reorder-after: reference "
                         "symbol %s was found outside section boundaries "
                         "and the objects to be reordered are collation "
                         "definitions\n", s.name.c_str ());
         }

         // insert the symbolic name there
         collate_entry_list_t::iterator e_it = e.begin ();
         for (; e_it != e.end (); ++e_it)
             it = sym_list_.insert (++it, e_it->first);

         return true;
     }

     // if not successful then search in each section
     std::list<collate_section_t>::iterator sect_it = section_list_.begin ();
     while (sect_it != section_list_.end ()) {
         collate_entry_list_t::iterator e_it = sect_it->entries.begin ();
         for (; e_it != sect_it->entries.end (); ++e_it) {
             if (e_it->first.name != s.name)
                 continue;

             // found it; insert entry
             sect_it->entries.insert (++e_it, e.begin (), e.end ());
             return true;
         }
         ++sect_it;
     }

     issue_diag (W_REORD, false, &s,
                 "requested reorder-after: reference "
                 "symbol %s was not found \n", s.name.c_str ());

     return false;
 }


 // FIXME - modify the algorithm to get log(N) complexity
 void Def::remove_entry (collate_entry_t& e)
 {
     // search in the sym_list_ and in the sections
     if (e.second.empty ()) {
         token_list_t::iterator it = sym_list_.begin ();
         for (; it != sym_list_.end (); ++it) {
             if (it->name != e.first.name)
                 continue;

             sym_list_.erase (it);
             return;
         }
     }

     // search in the sections
     std::list<collate_section_t>::iterator sect_it = section_list_.begin ();
     while (sect_it != section_list_.end ()) {
         std::list<collate_entry_t>::iterator e_it = sect_it->entries.begin ();
         for (; e_it != sect_it->entries.end (); ++e_it) {
             if (e_it->first.name != e.first.name)
                 continue;

             sect_it->entries.erase (e_it);
             return;
         }
         ++sect_it;
     }
     return ;
 }


 bool Def::get_weight ( token_t&     w,
                        Weights_t*   weights,
                        int          weight_num)
 {
     const std::map<std::string, wchar_t>& w_cmap = charmap_.get_w_cmap ();

     bool ret = false;
     weights[weight_num].size = 1;
     ce_map_iter ce_map_it;
     if(w.token == Scanner::tok_sym_name) {
         w_cmap_iter w_cmap_pos = w_cmap.find (w.name);
         if (w_cmap_pos != w_cmap.end()){
             coll_map_iter coll_map_pos = coll_map_.find(w_cmap_pos->second);

             if (coll_map_pos->second.coll_val == UINT_MAX)
                 issue_diag (E_SYMUSED, true,
                             &w, "symbolic name %s "
                             "used as weight before being assigned a "
                             "collation value\n", w.name.c_str());

             // store weight
             weights[weight_num].weight[0] = coll_map_pos->second.coll_val;
         }
         else if ((ce_map_it = ce_map_.find (w.name))
                  != ce_map_.end())
             weights[weight_num].weight[0] = ce_map_it->second.coll_val;
         else {
             cs_map_iter cs_it = cs_map_.find (w.name);
             if (cs_it == cs_map_.end ())
                 issue_diag (E_SYNTAX, true,
                             &w, "symbolic name %s not found\n",
                             w.name.c_str());

             weights[weight_num].weight[0] = cs_it->second;
         }
     }
     else if (w.token == Scanner::tok_char_value) {
         // the weight is given in numerical form
         const char* next_val =
             std::strchr (w.name.c_str (), scanner_.escape_char ());

         assert (0 != next_val);

         const char* next_wt = std::strchr (w.name.c_str (), ';');

         while (weight_num < collate_out_.num_weights) {

             std::size_t c;

             for (c = 0; *next_val && (!next_wt || next_val < next_wt); ++c) {

                 const char* end = 0;

                 weights [weight_num].weight [c] =
                     scanner_.convert_escape (next_val, &end, true);

                 assert (0 != end);

                 next_val = end;
             }

             weights [weight_num++].size = c;

             if (next_wt)
                 next_wt = std::strchr (next_val, ';');

         }

     }
     else if(w.token == Scanner::tok_ignore) {
         // use the special weight 0 for IGNORE weights
         weights[weight_num].size = 1;
         weights[weight_num].weight[0] = 0;
     }
     else if (w.token == Scanner::tok_string) {
         // the weights are given either in symbolic name form (e.g.,
         // "<symbolic-name>" or in the form of a quoted multibyte
         // character string (e.g., "\001\d010\x16\")
         const std::string tmp (w.name.substr (1, w.name.size() - 2));

         // keeps track of the length of the weight
         unsigned char k = 0;

         // iterate thru the string content and retrieve the symbols
         std::string::const_iterator it = tmp.begin ();

         const char escape = scanner_.escape_char ();

         while (it != tmp.end ()) {
             // weight in string form
             std::string wsym;

             // next comes a symbolic name
             if (*it == '<') {
                 while (*it != '>') {
                     if (*it == escape)
                         ++it;

                     wsym += *it++;
                 }
                 wsym += *it++;

                 // wsym has the symbolic name, lookup for it in
                 // the character map, collating-symbol map,
                 // and collating-element map
                 w_cmap_iter w_cm_pos = w_cmap.find(wsym);
                 cs_map_iter cs_it = cs_map_.find (wsym);
                 ce_map_iter ce_it = ce_map_.find (wsym);

                 if (w_cm_pos != w_cmap.end()) {
                     // is in the character map, check its associated
                     // collation value
                     coll_map_iter coll_it = coll_map_.find(w_cm_pos->second);
                     if (coll_it == coll_map_.end())
                         issue_diag (E_SYMUSED, true, &w,
                                     "weight %s not defined\n",
                                     wsym.c_str());
                     if (coll_it->second.coll_val == UINT_MAX)
                         issue_diag (E_SYMUSED, true,
                                     &w, "symbolic name %s "
                                     "used as weight before being assigned a "
                                     "collation value\n", wsym.c_str());

                     weights[weight_num].weight[k++] = coll_it->second.coll_val;
                 }
                 else if (ce_it != ce_map_.end()) {
                     // it is in the collating-element map
                     weights[weight_num].weight[k++] = ce_it->second.coll_val;
                 }
                 else if (cs_it != cs_map_.end()) {
                     // it is in the collating-symbol map
                     weights[weight_num].weight[k++] = cs_it->second;
                 }
                 else {
                     // it is not in any of the maps, that's an error
                     issue_diag (E_SYNTAX, true, &w,
                                 "symbolic name %s not found\n",
                                 wsym.c_str());
                 }
             }
             else if (*it == escape) {

                 // weight is given in a quoted escape form
                 const char* const beg = tmp.c_str () + (it - tmp.begin ());
                 const char*       end = 0;

                 weights [weight_num].weight [k++] =
                     scanner_.convert_escape (beg, &end, true);

                 assert (0 != end);

                 it += end - beg;

                 break;
             }
             else
                 issue_diag (E_SYNTAX, true, &w,
                             "illegal string content as a weight");
         }

         weights[weight_num].size = k;
         if (k > collate_out_.longest_weight)
             collate_out_.longest_weight = k;

     }
     else if (w.token == Scanner::tok_abs_ellipsis)
         // return true if ellipsis are embedded in the weight
         ret = true;
     else
         warnings_occurred_ =
             issue_diag (W_SYNTAX, false, &w,
                         "illegal token %s found in collation definition\n",
                         w.name.c_str()) || warnings_occurred_;

     return ret;
 }


 // if undefined optimization is on then only those characters that have
 // been defined should go into the valid set, otherwise all characters go
 // into the set.
 void Def::gen_valid_coll_wchar_set () {

     if (!valid_coll_wchar_set_.empty())
         return;

     create_wchar_utf8_table();

     for (coll_map_iter coll_it = coll_map_.begin();
          coll_it != coll_map_.end(); ++coll_it) {
         if (coll_it->second.offset != UINT_MAX
             || !collate_out_.undefined_optimization){

             std::string valid = utf8_encode (coll_it->first);
             valid = valid.substr (0, valid.size() - 1);

             while (valid.size() > 0){
                 valid_coll_wchar_set_.insert (valid);
                 valid = valid.substr(0, valid.size() - 1);
             }
         }
     }
 }