| /*************************************************************************** |
| * |
| * def.h |
| * |
| * $Id$ |
| * |
| *************************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed |
| * with this work for additional information regarding copyright |
| * ownership. The ASF licenses this file to you under the Apache |
| * License, Version 2.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * permissions and limitations under the License. |
| * |
| * Copyright 2001-2006 Rogue Wave Software. |
| * |
| **************************************************************************/ |
| |
| #ifndef RWSTD_UTIL_DEF_H_INCLUDED |
| #define RWSTD_UTIL_DEF_H_INCLUDED |
| |
| #include <list> |
| #include <locale> // for ctype_base |
| #include <map> |
| #include <string> |
| #include <vector> |
| |
| #include <cassert> // for assert() |
| #include <climits> // for UCHAR_MAX |
| #include <cstddef> // for size_t |
| |
| #include <loc/_localedef.h> |
| |
| #include "scanner.h" |
| #include "charmap.h" |
| |
| |
| class Def |
| { |
| public: |
| |
| // the constructor takes in a pointer to the character map, the name |
| // of the file that hold the locale definiton, the name of the locale |
| // being created, and the value of mb_cur_max specified in the |
| // charmap file |
| Def(const char* filename, |
| const char* out_name, |
| Charmap& charmap, bool no_position); |
| |
| // free up all the dynamically allocated memory |
| ~Def (); |
| |
| // start point for processing the input files |
| void process_input (); |
| |
| // write the LC_CTYPE file to the specified directory |
| void write_ctype(std::string dir_name); |
| |
| void write_codecvt(std::string dir_name); |
| |
| // write the LC_NUMERIC file to the specified directory |
| void write_numeric(std::string dir_name); |
| |
| // write the LC_MONETARY file to the specified directory |
| void write_monetary(std::string dir_name); |
| |
| // write the LC_TIME file to the specified directory |
| void write_time(std::string dir_name); |
| |
| // write the LC_MESSAGES file to the specified directory |
| void write_messages(std::string dir_name); |
| |
| // write the LC_COLLATE file to the specified directory |
| void write_collate(std::string dir_name); |
| |
| // dump the collate information |
| void dump_collate (); |
| |
| // have warnings occurred |
| bool warnings_occurred_; |
| |
| // was the content of the locale definition file scanned ahead |
| bool scan_ahead_; |
| |
| typedef Scanner::token_t token_t; |
| typedef std::pair<token_t,token_t> token_pair_t; |
| typedef std::list<token_t> token_list_t; |
| typedef std::pair<token_t,token_list_t> collate_entry_t; |
| typedef std::pair<token_t,token_list_t> collate_elem_t; |
| typedef std::list<collate_entry_t> collate_entry_list_t; |
| struct collate_section_t; |
| |
| struct collate_section_t { |
| std::string name; |
| token_list_t order; |
| collate_entry_list_t entries; |
| }; |
| |
| private: |
| struct ce_info_t; |
| struct collate_info_t; |
| friend struct ce_info_t; |
| friend struct collate_info_t; |
| |
| // a struct used to represent the weights for each collating element |
| struct Weights_t { |
| unsigned char size; |
| unsigned int weight[256]; |
| }; |
| |
| ///////////////////////////////////////////////////////////////////// |
| // collate preprocessing information |
| |
| token_list_t script_list_; |
| token_list_t cs_list_; |
| token_list_t sym_list_; |
| std::list<collate_elem_t> ce_list_; |
| std::list<collate_section_t> section_list_; |
| |
| // preprocessing for collate section |
| void preprocess_collate (); |
| void preprocess_order (); |
| void preprocess_reorder (); |
| void preprocess_reorder_section (); |
| void preprocess_collation_definitions(); |
| |
| void process_collation_definition ( bool, collate_entry_t&, |
| unsigned int, unsigned int); |
| unsigned int process_order_stmt (collate_section_t&); |
| |
| bool insert_entries (token_t&, collate_entry_list_t&); |
| void remove_entry (collate_entry_t&); |
| void list_collate (); |
| |
| // automatically fill any ctype categories that depend upon characters |
| // being defined in other categories |
| void auto_fill (); |
| |
| // copy a category from one locale into the current locale |
| void copy_category(int cat, std::string name); |
| |
| // copy a file |
| void copy_file(const std::string &name, const std::string &outname); |
| |
| // process absolute ellipsis |
| std::size_t process_abs_ellipsis (const Scanner::token_t&, |
| std::ctype_base::mask); |
| |
| // process hexadecimal symbolic ellipsis, decimal symbolic ellipsis, |
| // and double increment hexadecimal symbolic ellipsis |
| std::size_t process_sym_ellipsis (const std::string&, |
| const std::string&, |
| Scanner::token_id, |
| std::ctype_base::mask); |
| |
| // parse the era string |
| void parse_era (const token_t&); |
| |
| // process the ctype category specified by m with the exception of |
| // (e.g. std::ctype_base::upper) |
| void process_mask (std::ctype_base::mask, const char*); |
| |
| // process the ctype toupper and tolower definitions |
| void process_upper_lower(Scanner::token_id tok); |
| |
| // process the ctype section of the locale definition file |
| void process_ctype(); |
| |
| // process transliteration information |
| void process_xlit (); |
| |
| void process_xlit_statement (std::size_t&); |
| |
| // process the collate section of the locale definition file |
| void process_collate (); |
| |
| // processing of collating definition statements |
| void process_collate_definition (bool, collate_entry_t&, |
| unsigned int&, unsigned int); |
| |
| // helper function for process_collate() that processes the collition |
| // order of the collating elements |
| void process_order (collate_section_t&, unsigned int&); |
| |
| // helper function for process_order() that processes the sequence |
| // of weights for each collating element |
| void process_weights(collate_entry_t&); |
| |
| // get the next weight |
| bool get_weight (token_t&, Weights_t*, int); |
| |
| // add a symbolic name to the collition array |
| void add_to_coll (const wchar_t val, |
| const Weights_t* weight_template, |
| const unsigned int coll_value, |
| const std::vector<bool>& ordinal_weights, |
| bool undefined_value); |
| |
| // add missing values when the UNDEFINED keyword is found or at the |
| // end of the collition array if UNDEFINED is not found |
| void add_missing_values (const std::vector<bool> &ordinal_weights, |
| const Weights_t* weights_template, |
| unsigned int &coll_value, bool give_warning); |
| |
| // process the monetary section of the locale definition file |
| void process_monetary(); |
| |
| // create the monetary formats |
| void create_format (char [4], char, char, char, bool); |
| |
| // process the numeric section of the locale definition file |
| void process_numeric(); |
| |
| // extracts and converts an array of strings such as those |
| // representing the names of weekdays in the LC_TIME section |
| Scanner::token_t |
| extract_string_array (std::string*, std::wstring*, std::size_t); |
| |
| |
| // process the time section of the locale definition file |
| void process_time(); |
| |
| // process the messages section of the locale definition file |
| void process_messages(); |
| |
| std::string convert_string (const std::string&); |
| std::wstring convert_wstring (const token_t&); |
| std::wstring convert_wstring (const std::vector<std::string>&); |
| |
| void strip_pair(const std::string&, std::string&, std::string&); |
| |
| // encode a wchar_t into utf8 encoding |
| std::string utf8_encode (wchar_t ch); |
| |
| // convert a utf8 encoded string to the encoding for this locale |
| std::string convert_to_ext (wchar_t val); |
| |
| bool get_n_val (const Scanner::token_t&, unsigned char &val); |
| bool get_w_val (const Scanner::token_t&, wchar_t &val); |
| |
| // initialize the coll_map with all the characters in the codeset |
| void init_coll_map(); |
| |
| void gen_n_to_w_coll_tables (const std::string &charp, |
| unsigned int tab_num); |
| |
| void gen_w_to_n_coll_tables (const std::string &charp, |
| unsigned int tab_num); |
| |
| // the next useable offset for collating elements greater then UCHAR_MAX |
| unsigned int next_offset_; |
| |
| Scanner::token_t next; |
| |
| // the name of the locale we are creating |
| std::string output_name_; |
| |
| // the charmap used to process the character map definition file |
| Charmap& charmap_; |
| |
| // the scanner used to process the locale definition file |
| Scanner scanner_; |
| |
| bool ctype_symlink_; |
| std::string ctype_filename_; |
| |
| |
| // maps characters to a mask value |
| std::map<wchar_t, unsigned int> mask_; |
| |
| // maps characters to their lower case representation |
| std::map<wchar_t, wchar_t> lower_; |
| |
| // maps characters to their upper case representation |
| std::map<wchar_t, wchar_t> upper_; |
| |
| typedef std::map<std::string, unsigned int>::iterator mb_char_off_map_iter; |
| |
| struct codecvt_offset_tab_t { |
| unsigned int off [UCHAR_MAX + 1]; |
| }; |
| |
| void create_wchar_utf8_table (); |
| std::map<std::string, std::string> wchar_utf8_to_ext_; |
| typedef std::map<std::string, std::string>::iterator wchar_utf8_iter; |
| |
| void gen_valid_coll_wchar_set (); |
| |
| std::set<std::string> valid_coll_wchar_set_; |
| typedef std::set<std::string>::iterator valid_coll_wchar_set_iter; |
| std::set<std::string> valid_codecvt_wchar_set_; |
| typedef std::set<std::string>::iterator valid_codecvt_wchar_set_iter; |
| |
| typedef std::map<unsigned, const codecvt_offset_tab_t*> |
| codecvt_offsets_map_t; |
| |
| // generates conversion tables of all valid multibyte characters |
| // from a multibyte character map populated from the character |
| // set description file |
| std::size_t |
| gen_mbchar_tables (codecvt_offsets_map_t&, |
| std::map<std::string, unsigned>&, |
| const std::string& = "", |
| unsigned = 0); |
| |
| std::size_t |
| gen_wchar_tables (codecvt_offsets_map_t&, |
| const std::string& = "", |
| unsigned = 0); |
| |
| std::size_t |
| gen_utf8_tables (codecvt_offsets_map_t&, |
| std::map<std::string, unsigned>&, |
| const std::string& = "", |
| unsigned = 0); |
| |
| std::set<std::string> valid_coll_mb_set_; |
| |
| void gen_valid_coll_mb_set(); |
| |
| // generation of transliteration tables |
| void gen_xlit_data (); |
| |
| // specifies if the locale file has already been written such as when |
| // the "copy" directive is used in a locale definition file |
| bool ctype_written_, codecvt_written_, collate_written_, time_written_, |
| num_written_, mon_written_, messages_written_; |
| bool ctype_def_found_, collate_def_found_, |
| time_def_found_, num_def_found_, mon_def_found_, messages_def_found_; |
| |
| // specifies if the keyword UNDEFINED is used in the LC_COLLATE definition |
| bool undefined_keyword_found_; |
| |
| // no_position_ is set by the "--no_position" command line option |
| // when true forward,postion orders will be treated like forward orders |
| bool no_position_; |
| |
| // collate maps |
| |
| struct offset_tab_t { |
| int first_offset; |
| unsigned int off[UCHAR_MAX + 1]; |
| }; |
| |
| std::map<unsigned int, offset_tab_t> char_offs_; |
| typedef std::map<unsigned int, offset_tab_t>::iterator char_offs_iter; |
| |
| std::map<unsigned int, offset_tab_t> w_to_n_coll_; |
| typedef std::map<unsigned int, offset_tab_t>::iterator w_to_n_coll_iter; |
| |
| unsigned int next_tab_num_; |
| unsigned int next_wchar_coll_tab_num_; |
| |
| |
| struct ce_offset_tab_t { |
| int first_offset; |
| int last_offset; |
| unsigned int off[UCHAR_MAX + 1]; |
| }; |
| std::map<unsigned int, ce_offset_tab_t> n_ce_offs_; |
| typedef std::map<unsigned int, ce_offset_tab_t>::iterator n_ce_offs_iter; |
| |
| std::map<unsigned int, ce_offset_tab_t> w_ce_offs_; |
| typedef std::map<unsigned int, ce_offset_tab_t>::iterator w_ce_offs_iter; |
| |
| std::set<std::string> valid_n_ce_set; |
| typedef std::set<std::string>::iterator valid_n_ce_set_iter; |
| |
| void gen_n_ce_tables (const std::set<std::string>, |
| unsigned int, unsigned int); |
| unsigned int next_n_ce_tab_num_; |
| void gen_w_ce_tables (const std::set<std::string>, |
| unsigned int, unsigned int); |
| unsigned int next_w_ce_tab_num_; |
| |
| std::map<std::string, std::string>ce_sym_map_; |
| std::map<std::string, std::string>ce_wsym_map_; |
| typedef std::map<std::string, std::string>::iterator ce_sym_map_iter; |
| // off_mapr maps an offset value to the symbol name or collating element |
| std::map<unsigned int, std::string> off_mapr_; |
| |
| // cs_map_ maps a collating symbol name to a collation value |
| std::map<std::string, unsigned int> cs_map_; |
| typedef std::map<std::string, unsigned int>::iterator cs_map_iter; |
| |
| // transliteration information |
| struct xlit_offset_table { |
| unsigned int offset_table [UCHAR_MAX + 1]; |
| }; |
| typedef struct xlit_offset_table xlit_offset_table_t; |
| typedef std::map<wchar_t,std::list<std::string> > xlit_map_t; |
| typedef std::map<wchar_t, unsigned int> xlit_data_offset_map_t; |
| typedef std::map<unsigned int,xlit_offset_table_t> xlit_table_map_t; |
| xlit_map_t xlit_map_; |
| xlit_data_offset_map_t xlit_data_offset_map_; |
| xlit_table_map_t xlit_table_map_; |
| |
| // the collate_info_t struct contains information concerning the collation |
| // of each character |
| struct collate_info_t{ |
| unsigned int offset; |
| unsigned int coll_val; |
| unsigned int order; |
| Weights_t *weights; |
| }; |
| |
| // we need one collate_info_t to hold information about the undefined |
| // characters. All the other characters have collate_info_ts that are |
| // located in the coll_map. |
| collate_info_t undef_char_info_; |
| |
| // the ce_info_t strurct contains information concerning the collation |
| // of a collating element. |
| |
| struct ce_info_t { |
| unsigned int offset; |
| unsigned int coll_val; |
| unsigned int order; |
| Weights_t *weights; |
| std::wstring ce_wstr; |
| }; |
| |
| // The coll_map_ contains a mapping from the wide char value to the |
| // collition information about that value. |
| std::map<wchar_t, collate_info_t> coll_map_; |
| typedef std::map<wchar_t, collate_info_t>::iterator coll_map_iter; |
| |
| // the ce_map_ contains a mapping from the symbolic collating element |
| // name to the collition information about that element |
| std::map <std::string, ce_info_t> ce_map_; |
| typedef std::map <std::string, ce_info_t>::iterator ce_map_iter; |
| |
| // iterator type definitions for the maps |
| typedef std::map<wchar_t, unsigned int>::iterator mask_iter; |
| typedef std::map<wchar_t, wchar_t>::iterator upper_iter; |
| typedef std::map<wchar_t, wchar_t>::iterator lower_iter; |
| typedef std::map< std::string, unsigned char >::const_iterator n_cmap_iter; |
| typedef std::map<std::string, wchar_t>::const_iterator mb_cmap_iter; |
| typedef std::map<wchar_t, std::string>::const_iterator rmb_cmap_iter; |
| typedef std::map<std::string, wchar_t >::const_iterator w_cmap_iter; |
| typedef std::map<wchar_t, std::string >::const_iterator rw_cmap_iter; |
| typedef std::map<unsigned int, std::string>::iterator off_mapr_iter; |
| typedef std::map<std::string, wchar_t>::const_iterator ucs4_cmap_iter; |
| |
| typedef std::list<std::string>::const_iterator symnames_list_iter; |
| |
| // the structures used to hold the offsets for each locale category |
| // and any non-pointer locale information |
| _RW::__rw_punct_t num_punct_out_; |
| _RW::__rw_ctype_t ctype_out_; |
| _RW::__rw_time_t time_out_; |
| _RW::__rw_collate_t collate_out_; |
| _RW::__rw_mon_t mon_out_; |
| _RW::__rw_num_t num_out_; |
| _RW::__rw_messages_t messages_out_; |
| |
| // structures used for internally holding locale information |
| // LC_CTYPE structures |
| |
| |
| struct ctype_t { |
| struct mask_elm { |
| wchar_t ch; // the wide character value |
| unsigned int mask; // the mask for that character |
| }; |
| struct upper_elm { |
| wchar_t lower; // the lower case wide character |
| wchar_t upper; // the upper case wide character |
| }; |
| struct lower_elm { |
| wchar_t upper; // the upper case wide character |
| wchar_t lower; // the lower case wide character |
| }; |
| |
| char max_mb_s; // the max number of bytes in a char |
| upper_elm* wtoupper_tab; // the wide char to_upper table |
| lower_elm* wtolower_tab; // the wide char to_lower table |
| mask_elm* wmask_tab; // the wide char mask_table |
| |
| }; |
| |
| |
| struct era_st { |
| std::string name; |
| std::string fmt; |
| std::wstring wname; |
| std::wstring wfmt; |
| _RW::__rw_time_t::era_t era_out; |
| }; |
| |
| std::list<era_st> era_list_; |
| typedef std::list<era_st>::iterator era_list_iter; |
| |
| // LC_COLLATE structure |
| struct collate_t { |
| } ; |
| |
| |
| |
| // LC_MONETARY structure |
| struct mon_t { |
| std::string int_curr_symbol; // narrow char* int_curr_symbol |
| std::string currency_symbol; // narrow char* currency_symbol |
| std::string mon_decimal_point; // narrow char* mon_decimal_point |
| std::string mon_thousands_sep; // narrow char* mon_thoucands_sep |
| std::string mon_grouping; // narrow char* mon_grouping |
| std::string positive_sign; // narrow char* positive_sign |
| std::string negative_sign; // narrow char* negative_sign |
| std::wstring wint_curr_symbol; // wide wchar_t* int_curr_symbol |
| std::wstring wcurrency_symbol; // wide wchar_t* currency_symbol |
| std::wstring wmon_decimal_point; // wide wchar_t* mon_decimal_point |
| std::wstring wmon_thousands_sep; // wide wchar_t* mon_thousands_sep |
| std::wstring wpositive_sign; // wide wchar_t* positive_sign |
| std::wstring wnegative_sign; // wide wchar_t* negative_sign |
| }; |
| |
| |
| |
| // LC_NUMERIC structure |
| struct num_t { |
| std::string decimal_point; // narrow char* decimal_point |
| std::string thousands_sep; // narrow char* thousands_sep |
| std::string grouping; // narrow char* grouping |
| std::string truename; // narrow char* truename |
| std::string falsename; // narrow char* falsename |
| |
| std::wstring wdecimal_point; // wide wchar_t* decimal_point |
| std::wstring wthousands_sep; // wide wchar_t* thousands_sep |
| std::wstring wtruename; // wide wchar_t* truename |
| std::wstring wfalsename; // wide wchar_t* falsename |
| }; |
| |
| // list to hold the alternate digits |
| struct alt_digit_t { |
| std::string n_alt_digit; |
| std::wstring w_alt_digit; |
| unsigned int n_offset; |
| unsigned int w_offset; |
| }; |
| std::list<alt_digit_t> alt_digits_; |
| typedef std::list<alt_digit_t>::iterator alt_digits_iter; |
| |
| // LC_TIME structure |
| struct time_t { |
| std::string abday[7]; // narrow array of abbreviated days |
| std::string day[7]; // narrow array of days |
| std::string abmon[12]; // narrow array of abbreviated months |
| std::string mon[12]; // narrow array of months |
| std::string am_pm[2]; // narrow array of am/pm specifiers |
| std::string d_t_fmt; // narrow date and time format string |
| std::string d_fmt; // narrow date format string |
| std::string t_fmt; // narrow time format string |
| std::string t_fmt_ampm; // narrow time format string with am/pm |
| std::string era_d_t_fmt; // narrow era date and time format string |
| std::string era_d_fmt; // narrow era date format string |
| std::string era_t_fmt; // narrow era time format string |
| |
| |
| |
| std::wstring wabday[7]; // wide array of abbreviated days |
| std::wstring wday[7]; // wide array of days |
| std::wstring wabmon[12]; // wide array of abbreviated months |
| std::wstring wmon[12]; // wide array of months |
| std::wstring wam_pm[2]; // wide array of am/pm specifiers |
| std::wstring wd_t_fmt; // wide date and time format string |
| std::wstring wd_fmt; // wide date format string |
| std::wstring wt_fmt; // wide time format string |
| std::wstring wt_fmt_ampm; // wide time format string with am/pm |
| std::wstring wera_d_t_fmt; // wide era date and time format string |
| std::wstring wera_d_fmt; // wide era date format string |
| std::wstring wera_t_fmt; // wide era time format string |
| } ; |
| |
| // LC_MESSAGES structure |
| struct messages_t { |
| std::string yesexpr; |
| std::string noexpr; |
| |
| std::wstring wyesexpr; |
| std::wstring wnoexpr; |
| }; |
| |
| messages_t messages_st_; |
| time_t time_st_; |
| ctype_t ctype_st_; |
| mon_t mon_st_; |
| num_t num_st_; |
| collate_t collate_st_; |
| |
| |
| }; |
| |
| |
| inline std::string strip_quotes (const std::string& str) |
| { |
| assert (0 != str.size ()); |
| assert (str [0] == '\"'); |
| |
| // return a string from str[1] to the position of the end-quote |
| return std::string (str, 1, str.rfind ('\"') - 1); |
| |
| } |
| |
| |
| #endif // RWSTD_UTIL_DEF_H_INCLUDED |