cpp/third_party/antlr4-cpp-runtime-4/runtime/src/Lexer.h - tsfile - Git at Google

 /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
  * Use of this file is governed by the BSD 3-clause license that
  * can be found in the LICENSE.txt file in the project root.
  */

 #pragma once

 #include "Recognizer.h"
 #include "TokenSource.h"
 #include "CharStream.h"
 #include "Token.h"

 namespace antlr4 {

   /// A lexer is recognizer that draws input symbols from a character stream.
   /// lexer grammars result in a subclass of this object. A Lexer object
   /// uses simplified match() and error recovery mechanisms in the interest
   /// of speed.
   class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
   public:
 #if __cplusplus >= 201703L
     static constexpr size_t DEFAULT_MODE = 0;
     static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1;
     static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2;

     static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
     static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL;
     static constexpr size_t MIN_CHAR_VALUE = 0;
     static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF;
 #else
     enum : size_t {
       DEFAULT_MODE = 0,
       MORE = static_cast<size_t>(-2), // std::numeric_limits<size_t>::max() - 1; doesn't work in VS 2013
       SKIP = static_cast<size_t>(-3), // std::numeric_limits<size_t>::max() - 2; doesn't work in VS 2013

       DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL,
       HIDDEN = Token::HIDDEN_CHANNEL,
       MIN_CHAR_VALUE = 0,
       MAX_CHAR_VALUE = 0x10FFFF,
     };
 #endif

     CharStream *_input; // Pure reference, usually from statically allocated instance.

   protected:
     /// How to create token objects.
     TokenFactory<CommonToken> *_factory;

   public:
     /// The goal of all lexer rules/methods is to create a token object.
     ///  This is an instance variable as multiple rules may collaborate to
     ///  create a single token.  nextToken will return this object after
     ///  matching lexer rule(s).  If you subclass to allow multiple token
     ///  emissions, then set this to the last token to be matched or
     ///  something nonnull so that the auto token emit mechanism will not
     ///  emit another token.

     // Life cycle of a token is this:
     // Created by emit() (via the token factory) or by action code, holding ownership of it.
     // Ownership is handed over to the token stream when calling nextToken().
     std::unique_ptr<Token> token;

     /// <summary>
     /// What character index in the stream did the current token start at?
     ///  Needed, for example, to get the text for current token.  Set at
     ///  the start of nextToken.
     /// </summary>
     size_t tokenStartCharIndex;

     /// <summary>
     /// The line on which the first character of the token resides </summary>
     size_t tokenStartLine;

     /// The character position of first character within the line.
     size_t tokenStartCharPositionInLine;

     /// Once we see EOF on char stream, next token will be EOF.
     /// If you have DONE : EOF ; then you see DONE EOF.
     bool hitEOF;

     /// The channel number for the current token.
     size_t channel;

     /// The token type for the current token.
     size_t type;

     // Use the vector as a stack.
     std::vector<size_t> modeStack;
     size_t mode;

     Lexer();
     Lexer(CharStream *input);
     virtual ~Lexer() {}

     virtual void reset();

     /// Return a token from this source; i.e., match a token on the char stream.
     virtual std::unique_ptr<Token> nextToken() override;

     /// Instruct the lexer to skip creating a token for current lexer rule
     /// and look for another token.  nextToken() knows to keep looking when
     /// a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
     /// if token == null at end of any token rule, it creates one for you
     /// and emits it.
     virtual void skip();
     virtual void more();
     virtual void setMode(size_t m);
     virtual void pushMode(size_t m);
     virtual size_t popMode();

     template<typename T1>
     void setTokenFactory(TokenFactory<T1> *factory)  {
       this->_factory = factory;
     }

     virtual TokenFactory<CommonToken>* getTokenFactory() override;

     /// Set the char stream and reset the lexer
     virtual void setInputStream(IntStream *input) override;

     virtual std::string getSourceName() override;

     virtual CharStream* getInputStream() override;

     /// By default does not support multiple emits per nextToken invocation
     /// for efficiency reasons. Subclasses can override this method, nextToken,
     /// and getToken (to push tokens into a list and pull from that list
     /// rather than a single variable as this implementation does).
     virtual void emit(std::unique_ptr<Token> newToken);

     /// The standard method called to automatically emit a token at the
     /// outermost lexical rule.  The token object should point into the
     /// char buffer start..stop.  If there is a text override in 'text',
     /// use that to set the token's text.  Override this method to emit
     /// custom Token objects or provide a new factory.
     virtual Token* emit();

     virtual Token* emitEOF();

     virtual size_t getLine() const override;

     virtual size_t getCharPositionInLine() override;

     virtual void setLine(size_t line);

     virtual void setCharPositionInLine(size_t charPositionInLine);

     /// What is the index of the current character of lookahead?
     virtual size_t getCharIndex();

     /// Return the text matched so far for the current token or any
     /// text override.
     virtual std::string getText();

     /// Set the complete text of this token; it wipes any previous
     /// changes to the text.
     virtual void setText(const std::string &text);

     /// Override if emitting multiple tokens.
     virtual std::unique_ptr<Token> getToken();

     virtual void setToken(std::unique_ptr<Token> newToken);

     virtual void setType(size_t ttype);

     virtual size_t getType();

     virtual void setChannel(size_t newChannel);

     virtual size_t getChannel();

     virtual const std::vector<std::string>& getChannelNames() const = 0;

     virtual const std::vector<std::string>& getModeNames() const = 0;

     /// Return a list of all Token objects in input char stream.
     /// Forces load of all tokens. Does not include EOF token.
     virtual std::vector<std::unique_ptr<Token>> getAllTokens();

     virtual void recover(const LexerNoViableAltException &e);

     virtual void notifyListeners(const LexerNoViableAltException &e);

     virtual std::string getErrorDisplay(const std::string &s);

     /// Lexers can normally match any char in it's vocabulary after matching
     /// a token, so do the easy thing and just kill a character and hope
     /// it all works out.  You can instead use the rule invocation stack
     /// to do sophisticated error recovery if you are in a fragment rule.
     virtual void recover(RecognitionException *re);

     /// <summary>
     /// Gets the number of syntax errors reported during parsing. This value is
     /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
     /// </summary>
     /// <seealso cref= #notifyListeners </seealso>
     virtual size_t getNumberOfSyntaxErrors();

   protected:
     /// You can set the text for the current token to override what is in
     /// the input char buffer (via setText()).
     std::string _text;

   private:
     size_t _syntaxErrors;
     void InitializeInstanceFields();
   };

 } // namespace antlr4
	/* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
	* Use of this file is governed by the BSD 3-clause license that
	* can be found in the LICENSE.txt file in the project root.
	*/

	#pragma once

	#include "Recognizer.h"
	#include "TokenSource.h"
	#include "CharStream.h"
	#include "Token.h"

	namespace antlr4 {

	/// A lexer is recognizer that draws input symbols from a character stream.
	/// lexer grammars result in a subclass of this object. A Lexer object
	/// uses simplified match() and error recovery mechanisms in the interest
	/// of speed.
	class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
	public:
	#if __cplusplus >= 201703L
	static constexpr size_t DEFAULT_MODE = 0;
	static constexpr size_t MORE = std::numeric_limits<size_t>::max() - 1;
	static constexpr size_t SKIP = std::numeric_limits<size_t>::max() - 2;

	static constexpr size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
	static constexpr size_t HIDDEN = Token::HIDDEN_CHANNEL;
	static constexpr size_t MIN_CHAR_VALUE = 0;
	static constexpr size_t MAX_CHAR_VALUE = 0x10FFFF;
	#else
	enum : size_t {
	DEFAULT_MODE = 0,
	MORE = static_cast<size_t>(-2), // std::numeric_limits<size_t>::max() - 1; doesn't work in VS 2013
	SKIP = static_cast<size_t>(-3), // std::numeric_limits<size_t>::max() - 2; doesn't work in VS 2013

	DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL,
	HIDDEN = Token::HIDDEN_CHANNEL,
	MIN_CHAR_VALUE = 0,
	MAX_CHAR_VALUE = 0x10FFFF,
	};
	#endif

	CharStream *_input; // Pure reference, usually from statically allocated instance.

	protected:
	/// How to create token objects.
	TokenFactory<CommonToken> *_factory;

	public:
	/// The goal of all lexer rules/methods is to create a token object.
	/// This is an instance variable as multiple rules may collaborate to
	/// create a single token. nextToken will return this object after
	/// matching lexer rule(s). If you subclass to allow multiple token
	/// emissions, then set this to the last token to be matched or
	/// something nonnull so that the auto token emit mechanism will not
	/// emit another token.

	// Life cycle of a token is this:
	// Created by emit() (via the token factory) or by action code, holding ownership of it.
	// Ownership is handed over to the token stream when calling nextToken().
	std::unique_ptr<Token> token;

	/// <summary>
	/// What character index in the stream did the current token start at?
	/// Needed, for example, to get the text for current token. Set at
	/// the start of nextToken.
	/// </summary>
	size_t tokenStartCharIndex;

	/// <summary>
	/// The line on which the first character of the token resides </summary>
	size_t tokenStartLine;

	/// The character position of first character within the line.
	size_t tokenStartCharPositionInLine;

	/// Once we see EOF on char stream, next token will be EOF.
	/// If you have DONE : EOF ; then you see DONE EOF.
	bool hitEOF;

	/// The channel number for the current token.
	size_t channel;

	/// The token type for the current token.
	size_t type;

	// Use the vector as a stack.
	std::vector<size_t> modeStack;
	size_t mode;

	Lexer();
	Lexer(CharStream *input);
	virtual ~Lexer() {}

	virtual void reset();

	/// Return a token from this source; i.e., match a token on the char stream.
	virtual std::unique_ptr<Token> nextToken() override;

	/// Instruct the lexer to skip creating a token for current lexer rule
	/// and look for another token. nextToken() knows to keep looking when
	/// a lexer rule finishes with token set to SKIP_TOKEN. Recall that
	/// if token == null at end of any token rule, it creates one for you
	/// and emits it.
	virtual void skip();
	virtual void more();
	virtual void setMode(size_t m);
	virtual void pushMode(size_t m);
	virtual size_t popMode();

	template<typename T1>
	void setTokenFactory(TokenFactory<T1> *factory) {
	this->_factory = factory;
	}

	virtual TokenFactory<CommonToken>* getTokenFactory() override;

	/// Set the char stream and reset the lexer
	virtual void setInputStream(IntStream *input) override;

	virtual std::string getSourceName() override;

	virtual CharStream* getInputStream() override;

	/// By default does not support multiple emits per nextToken invocation
	/// for efficiency reasons. Subclasses can override this method, nextToken,
	/// and getToken (to push tokens into a list and pull from that list
	/// rather than a single variable as this implementation does).
	virtual void emit(std::unique_ptr<Token> newToken);

	/// The standard method called to automatically emit a token at the
	/// outermost lexical rule. The token object should point into the
	/// char buffer start..stop. If there is a text override in 'text',
	/// use that to set the token's text. Override this method to emit
	/// custom Token objects or provide a new factory.
	virtual Token* emit();

	virtual Token* emitEOF();

	virtual size_t getLine() const override;

	virtual size_t getCharPositionInLine() override;

	virtual void setLine(size_t line);

	virtual void setCharPositionInLine(size_t charPositionInLine);

	/// What is the index of the current character of lookahead?
	virtual size_t getCharIndex();

	/// Return the text matched so far for the current token or any
	/// text override.
	virtual std::string getText();

	/// Set the complete text of this token; it wipes any previous
	/// changes to the text.
	virtual void setText(const std::string &text);

	/// Override if emitting multiple tokens.
	virtual std::unique_ptr<Token> getToken();

	virtual void setToken(std::unique_ptr<Token> newToken);

	virtual void setType(size_t ttype);

	virtual size_t getType();

	virtual void setChannel(size_t newChannel);

	virtual size_t getChannel();

	virtual const std::vector<std::string>& getChannelNames() const = 0;

	virtual const std::vector<std::string>& getModeNames() const = 0;

	/// Return a list of all Token objects in input char stream.
	/// Forces load of all tokens. Does not include EOF token.
	virtual std::vector<std::unique_ptr<Token>> getAllTokens();

	virtual void recover(const LexerNoViableAltException &e);

	virtual void notifyListeners(const LexerNoViableAltException &e);

	virtual std::string getErrorDisplay(const std::string &s);

	/// Lexers can normally match any char in it's vocabulary after matching
	/// a token, so do the easy thing and just kill a character and hope
	/// it all works out. You can instead use the rule invocation stack
	/// to do sophisticated error recovery if you are in a fragment rule.
	virtual void recover(RecognitionException *re);

	/// <summary>
	/// Gets the number of syntax errors reported during parsing. This value is
	/// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
	/// </summary>
	/// <seealso cref= #notifyListeners </seealso>
	virtual size_t getNumberOfSyntaxErrors();

	protected:
	/// You can set the text for the current token to override what is in
	/// the input char buffer (via setText()).
	std::string _text;

	private:
	size_t _syntaxErrors;
	void InitializeInstanceFields();
	};

	} // namespace antlr4