blob: d742359901b49ae5880353bc04d572e71a8fc505 [file] [log] [blame]
/***************************************************************************
*
* scanner.h
*
* $Id$
*
***************************************************************************
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*
* Copyright 2001-2006 Rogue Wave Software.
*
**************************************************************************/
#ifndef RWSTD_UTIL_SCANNER_H_INCLUDED
#define RWSTD_UTIL_SCANNER_H_INCLUDED
#include <string>
#include <stack>
#include <climits> // for ULONG_MAX
struct ScannerContext;
class Scanner
{
public:
// enumeration of all tokens in the character map
// and locale definition file
enum token_id {
tok_code_set_name, // <code_set_name>
tok_mb_cur_max, // <mb_cur_max>
tok_mb_cur_min, // <mb_cur_min>
// sections
tok_charmap, // beginning of CHARMAP section
tok_collate, // beginning of LC_COLLATE section
tok_ctype, // beginning of LC_CTYPE section
tok_messages, // beginning of LC_MESSAGES section
tok_monetary, // beginning of LC_MONETARY section
tok_numeric, // beginning of LC_NUMERIC section
tok_time, // beginning of LC_TIME section
// ISO/IEC TR 14652 extensions:
tok_addr, // beginning of LC_ADDRESS section
tok_ident, // beginning of LC_IDENTIFICATION section
tok_measure, // beginning of LC_MEASUREMENT section
tok_name, // beginning of LC_NAME section
tok_paper, // beginning of LC_PAPER section
tok_phone, // beginning of LC_TELEPHONE section
//
tok_end, // END of a section
// LC_CTYPE-specific tokens
tok_upper, // upper section of LC_CTYPE
tok_lower, // lower section of LC_CTYPE
tok_digit, // digit section of LC_CTYPE
tok_space, // space section of LC_CTYPE
tok_alpha, // alpha section of LC_CTYPE
tok_graph, // graph section of LC_CTYPE
tok_print, // print section of LC_CTYPE
tok_cntrl, // cntrl section of LC_CTYPE
tok_punct, // punct section of LC_CTYPE
tok_xdigit, // xdigit section of LC_CTYPE
tok_blank, // blank section of LC_CTYPE
tok_tolower, // tolower section of LC_CTYPE
tok_toupper, // toupper section of LC_CTYPE
// LC_COLLATE-specific tokens
tok_script,
tok_coll_elem, // collating-element
tok_coll_sym, // collating symbol
tok_from,
tok_xlit_start, // translit_start
tok_xlit_end, // translit_end
tok_reorder,
tok_reorder_end,
tok_reorder_section,
tok_reorder_section_end,
tok_order_start,
tok_order_end,
tok_forward,
tok_backward,
tok_position,
tok_undefined,
//
tok_string,
tok_ignore,
// absolute, hexadecimal, decimal, and double-increment
// ellipses (see ISO/IEC TR 14652)
tok_abs_ellipsis, // "..."
tok_hex_ellipsis, // ".."
tok_dec_ellipsis, // "...."
tok_dbl_ellipsis, // "..(N).."
tok_width,
// LC_MONETARY-specific tokens
tok_int_curr_symbol,
tok_currency_symbol,
tok_mon_decimal_point,
tok_mon_thousands_sep,
tok_mon_grouping,
tok_positive_sign,
tok_negative_sign,
tok_int_frac_digits,
tok_frac_digits,
tok_p_cs_precedes,
tok_p_sep_by_space,
tok_n_cs_precedes,
tok_n_sep_by_space,
tok_p_sign_posn,
tok_n_sign_posn,
tok_int_p_cs_precedes,
tok_int_n_cs_precedes,
tok_int_p_sep_by_space,
tok_int_n_sep_by_space,
tok_int_p_sign_posn,
tok_int_n_sign_posn,
// LC_NUMERIC-specific tokens
tok_decimal_point, // decimal point
tok_thousands_sep, // thousands_sep
tok_grouping, // grouping
tok_truename, // truename (C++ extension)
tok_falsename, // falsename (C++ extension)
// LC_TIME-specific tokens
tok_abday,
tok_day,
tok_abmon,
tok_mon,
tok_d_t_fmt,
tok_d_fmt,
tok_t_fmt,
tok_am_pm,
tok_t_fmt_ampm,
tok_era,
tok_era_d_fmt,
tok_era_t_fmt,
tok_era_d_t_fmt,
tok_alt_digits,
// LC_MESSAGES-specific tokens
tok_yesexpr,
tok_noexpr,
// LC_ADDRESS-specific tokens
// LC_IDENTIFICATION-specific tokens
// LC_MEASUREMENT-specific tokens
// LC_NAME-specific tokens
// LC_PAPER-specific tokens
// LC_TELEPHONE-specific tokens
// other:
tok_sym_name, // symbolic character name
tok_char_value, // character value (octal, decimal, or hex)
tok_comment, // comment
tok_comment_char, // <comment_char>
tok_escape_char, // <escape_char>
tok_copy, // copy directive
tok_include, // include directive
tok_nl, // newline
tok_ndef, // unknown/undefined token
tok_end_tokens // end of input
};
// scanner states
// enum {valid, invalid};
// a structure that represents a token
struct token_t {
std::string name;
token_id token;
// file position
int line;
int column;
// file name pointer
const char* file;
};
// realization
Scanner ();
virtual ~Scanner();
// public interface
token_t next_token ();
void open (std::string, char = '#', char = '\\');
void close ();
char escape_char () const;
void ignore_line ();
// converts an octal, decimal, or hexadecimal escape sequence
// (or a multibyte sequence of such things) to a numeric value
unsigned long
convert_escape (const char*, const char** = 0, bool = false) const;
private:
Scanner (const Scanner&); // not defined
void operator= (const Scanner&); // not defined
// helper function that identifies a token from a string and
// returns a new token_t object
token_id process_token (const char* name);
// read a line from stream
void read_line ();
// current file context and stack of context objects
ScannerContext* context_;
std::stack<ScannerContext*> context_stack_;
unsigned nlines_; // number of lines read
unsigned ntokens_; // number of tokens read
// was the last token an escaped newline
bool escaped_newline_;
};
#endif // RWSTD_UTIL_SCANNER_H_INCLUDED