| /*************************************************************************** |
| * |
| * scanner.h |
| * |
| * $Id$ |
| * |
| *************************************************************************** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed |
| * with this work for additional information regarding copyright |
| * ownership. The ASF licenses this file to you under the Apache |
| * License, Version 2.0 (the "License"); you may not use this file |
| * except in compliance with the License. You may obtain a copy of |
| * the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or |
| * implied. See the License for the specific language governing |
| * permissions and limitations under the License. |
| * |
| * Copyright 2001-2006 Rogue Wave Software. |
| * |
| **************************************************************************/ |
| |
| #ifndef RWSTD_UTIL_SCANNER_H_INCLUDED |
| #define RWSTD_UTIL_SCANNER_H_INCLUDED |
| |
| #include <string> |
| #include <stack> |
| #include <climits> // for ULONG_MAX |
| |
| |
| struct ScannerContext; |
| |
| |
| class Scanner |
| { |
| public: |
| |
| // enumeration of all tokens in the character map |
| // and locale definition file |
| enum token_id { |
| tok_code_set_name, // <code_set_name> |
| tok_mb_cur_max, // <mb_cur_max> |
| tok_mb_cur_min, // <mb_cur_min> |
| // sections |
| tok_charmap, // beginning of CHARMAP section |
| tok_collate, // beginning of LC_COLLATE section |
| tok_ctype, // beginning of LC_CTYPE section |
| tok_messages, // beginning of LC_MESSAGES section |
| tok_monetary, // beginning of LC_MONETARY section |
| tok_numeric, // beginning of LC_NUMERIC section |
| tok_time, // beginning of LC_TIME section |
| // ISO/IEC TR 14652 extensions: |
| tok_addr, // beginning of LC_ADDRESS section |
| tok_ident, // beginning of LC_IDENTIFICATION section |
| tok_measure, // beginning of LC_MEASUREMENT section |
| tok_name, // beginning of LC_NAME section |
| tok_paper, // beginning of LC_PAPER section |
| tok_phone, // beginning of LC_TELEPHONE section |
| // |
| tok_end, // END of a section |
| // LC_CTYPE-specific tokens |
| tok_upper, // upper section of LC_CTYPE |
| tok_lower, // lower section of LC_CTYPE |
| tok_digit, // digit section of LC_CTYPE |
| tok_space, // space section of LC_CTYPE |
| tok_alpha, // alpha section of LC_CTYPE |
| tok_graph, // graph section of LC_CTYPE |
| tok_print, // print section of LC_CTYPE |
| tok_cntrl, // cntrl section of LC_CTYPE |
| tok_punct, // punct section of LC_CTYPE |
| tok_xdigit, // xdigit section of LC_CTYPE |
| tok_blank, // blank section of LC_CTYPE |
| tok_tolower, // tolower section of LC_CTYPE |
| tok_toupper, // toupper section of LC_CTYPE |
| // LC_COLLATE-specific tokens |
| tok_script, |
| tok_coll_elem, // collating-element |
| tok_coll_sym, // collating symbol |
| tok_from, |
| tok_xlit_start, // translit_start |
| tok_xlit_end, // translit_end |
| tok_reorder, |
| tok_reorder_end, |
| tok_reorder_section, |
| tok_reorder_section_end, |
| tok_order_start, |
| tok_order_end, |
| tok_forward, |
| tok_backward, |
| tok_position, |
| tok_undefined, |
| // |
| tok_string, |
| tok_ignore, |
| // absolute, hexadecimal, decimal, and double-increment |
| // ellipses (see ISO/IEC TR 14652) |
| tok_abs_ellipsis, // "..." |
| tok_hex_ellipsis, // ".." |
| tok_dec_ellipsis, // "...." |
| tok_dbl_ellipsis, // "..(N).." |
| tok_width, |
| // LC_MONETARY-specific tokens |
| tok_int_curr_symbol, |
| tok_currency_symbol, |
| tok_mon_decimal_point, |
| tok_mon_thousands_sep, |
| tok_mon_grouping, |
| tok_positive_sign, |
| tok_negative_sign, |
| tok_int_frac_digits, |
| tok_frac_digits, |
| tok_p_cs_precedes, |
| tok_p_sep_by_space, |
| tok_n_cs_precedes, |
| tok_n_sep_by_space, |
| tok_p_sign_posn, |
| tok_n_sign_posn, |
| tok_int_p_cs_precedes, |
| tok_int_n_cs_precedes, |
| tok_int_p_sep_by_space, |
| tok_int_n_sep_by_space, |
| tok_int_p_sign_posn, |
| tok_int_n_sign_posn, |
| // LC_NUMERIC-specific tokens |
| tok_decimal_point, // decimal point |
| tok_thousands_sep, // thousands_sep |
| tok_grouping, // grouping |
| tok_truename, // truename (C++ extension) |
| tok_falsename, // falsename (C++ extension) |
| // LC_TIME-specific tokens |
| tok_abday, |
| tok_day, |
| tok_abmon, |
| tok_mon, |
| tok_d_t_fmt, |
| tok_d_fmt, |
| tok_t_fmt, |
| tok_am_pm, |
| tok_t_fmt_ampm, |
| tok_era, |
| tok_era_d_fmt, |
| tok_era_t_fmt, |
| tok_era_d_t_fmt, |
| tok_alt_digits, |
| // LC_MESSAGES-specific tokens |
| tok_yesexpr, |
| tok_noexpr, |
| // LC_ADDRESS-specific tokens |
| // LC_IDENTIFICATION-specific tokens |
| // LC_MEASUREMENT-specific tokens |
| // LC_NAME-specific tokens |
| // LC_PAPER-specific tokens |
| // LC_TELEPHONE-specific tokens |
| // other: |
| tok_sym_name, // symbolic character name |
| tok_char_value, // character value (octal, decimal, or hex) |
| tok_comment, // comment |
| tok_comment_char, // <comment_char> |
| tok_escape_char, // <escape_char> |
| tok_copy, // copy directive |
| tok_include, // include directive |
| tok_nl, // newline |
| tok_ndef, // unknown/undefined token |
| tok_end_tokens // end of input |
| }; |
| |
| // scanner states |
| // enum {valid, invalid}; |
| |
| // a structure that represents a token |
| struct token_t { |
| std::string name; |
| token_id token; |
| |
| // file position |
| int line; |
| int column; |
| |
| // file name pointer |
| const char* file; |
| }; |
| |
| // realization |
| Scanner (); |
| virtual ~Scanner(); |
| |
| // public interface |
| token_t next_token (); |
| |
| void open (std::string, char = '#', char = '\\'); |
| |
| void close (); |
| |
| char escape_char () const; |
| |
| void ignore_line (); |
| |
| // converts an octal, decimal, or hexadecimal escape sequence |
| // (or a multibyte sequence of such things) to a numeric value |
| unsigned long |
| convert_escape (const char*, const char** = 0, bool = false) const; |
| |
| private: |
| |
| Scanner (const Scanner&); // not defined |
| void operator= (const Scanner&); // not defined |
| |
| // helper function that identifies a token from a string and |
| // returns a new token_t object |
| token_id process_token (const char* name); |
| |
| // read a line from stream |
| void read_line (); |
| |
| // current file context and stack of context objects |
| ScannerContext* context_; |
| std::stack<ScannerContext*> context_stack_; |
| |
| unsigned nlines_; // number of lines read |
| unsigned ntokens_; // number of tokens read |
| |
| // was the last token an escaped newline |
| bool escaped_newline_; |
| }; |
| |
| |
| #endif // RWSTD_UTIL_SCANNER_H_INCLUDED |