| /* |
| * [The "BSD license"] |
| * Copyright (c) 2012-2015 Terence Parr |
| * Copyright (c) 2012-2015 Sam Harwell |
| * Copyright (c) 2015 Gerald Rosenberg |
| * All rights reserved. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. The name of the author may not be used to endorse or promote products |
| * derived from this software without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR |
| * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
| * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
| * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, |
| * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
| * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
| * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
| * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
| * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
| * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| /** |
| * A grammar for ANTLR v4 implemented using v4 syntax |
| * |
| * Modified 2015.06.16 gbr |
| * -- update for compatibility with Antlr v4.5 |
| */ |
| |
| // ====================================================== |
| // Lexer specification |
| // ====================================================== |
| |
| lexer grammar ANTLRv4Lexer; |
| |
| options { superClass = LexerAdaptor; } |
| import LexBasic; |
| |
| // Standard set of fragments |
| tokens { TOKEN_REF , RULE_REF , LEXER_CHAR_SET } |
| channels { OFF_CHANNEL , COMMENT } |
| |
| // ------------------------- |
| // Comments |
| DOC_COMMENT |
| : DocComment -> channel (COMMENT) |
| ; |
| |
| BLOCK_COMMENT |
| : BlockComment -> channel (COMMENT) |
| ; |
| |
| LINE_COMMENT |
| : LineComment -> channel (COMMENT) |
| ; |
| |
| // ------------------------- |
| // Integer |
| |
| INT |
| : DecimalNumeral |
| ; |
| |
| // ------------------------- |
| // Literal string |
| // |
| // ANTLR makes no distinction between a single character literal and a |
| // multi-character string. All literals are single quote delimited and |
| // may contain unicode escape sequences of the form \uxxxx, where x |
| // is a valid hexadecimal number (per Unicode standard). |
| STRING_LITERAL |
| : SQuoteLiteral |
| ; |
| |
| UNTERMINATED_STRING_LITERAL |
| : USQuoteLiteral |
| ; |
| |
| // ------------------------- |
| // Arguments |
| // |
| // Certain argument lists, such as those specifying call parameters |
| // to a rule invocation, or input parameters to a rule specification |
| // are contained within square brackets. |
| BEGIN_ARGUMENT |
| : LBrack |
| { this.handleBeginArgument(); } |
| ; |
| |
| // ------------------------- |
| // Target Language Actions |
| BEGIN_ACTION |
| : LBrace -> pushMode (TargetLanguageAction) |
| ; |
| |
| // ------------------------- |
| // Keywords |
| // |
| // 'options', 'tokens', and 'channels' are considered keywords |
| // but only when followed by '{', and considered as a single token. |
| // Otherwise, the symbols are tokenized as RULE_REF and allowed as |
| // an identifier in a labeledElement. |
| OPTIONS : 'options' WSNLCHARS* '{' ; |
| TOKENS : 'tokens' WSNLCHARS* '{' ; |
| CHANNELS : 'channels' WSNLCHARS* '{' ; |
| |
| fragment WSNLCHARS : ' ' | '\t' | '\f' | '\n' | '\r' ; |
| |
| IMPORT |
| : 'import' |
| ; |
| |
| FRAGMENT |
| : 'fragment' |
| ; |
| |
| LEXER |
| : 'lexer' |
| ; |
| |
| PARSER |
| : 'parser' |
| ; |
| |
| GRAMMAR |
| : 'grammar' |
| ; |
| |
| PROTECTED |
| : 'protected' |
| ; |
| |
| PUBLIC |
| : 'public' |
| ; |
| |
| PRIVATE |
| : 'private' |
| ; |
| |
| RETURNS |
| : 'returns' |
| ; |
| |
| LOCALS |
| : 'locals' |
| ; |
| |
| THROWS |
| : 'throws' |
| ; |
| |
| CATCH |
| : 'catch' |
| ; |
| |
| FINALLY |
| : 'finally' |
| ; |
| |
| MODE |
| : 'mode' |
| ; |
| // ------------------------- |
| // Punctuation |
| |
| COLON |
| : Colon |
| ; |
| |
| COLONCOLON |
| : DColon |
| ; |
| |
| COMMA |
| : Comma |
| ; |
| |
| SEMI |
| : Semi |
| ; |
| |
| LPAREN |
| : LParen |
| ; |
| |
| RPAREN |
| : RParen |
| ; |
| |
| LBRACE |
| : LBrace |
| ; |
| |
| RBRACE |
| : RBrace |
| ; |
| |
| RARROW |
| : RArrow |
| ; |
| |
| LT |
| : Lt |
| ; |
| |
| GT |
| : Gt |
| ; |
| |
| ASSIGN |
| : Equal |
| ; |
| |
| QUESTION |
| : Question |
| ; |
| |
| STAR |
| : Star |
| ; |
| |
| PLUS_ASSIGN |
| : PlusAssign |
| ; |
| |
| PLUS |
| : Plus |
| ; |
| |
| OR |
| : Pipe |
| ; |
| |
| DOLLAR |
| : Dollar |
| ; |
| |
| RANGE |
| : Range |
| ; |
| |
| DOT |
| : Dot |
| ; |
| |
| AT |
| : At |
| ; |
| |
| POUND |
| : Pound |
| ; |
| |
| NOT |
| : Tilde |
| ; |
| // ------------------------- |
| // Identifiers - allows unicode rule/token names |
| |
| ID |
| : Id |
| ; |
| // ------------------------- |
| // Whitespace |
| |
| WS |
| : Ws+ -> channel (OFF_CHANNEL) |
| ; |
| |
| // ------------------------- |
| // Illegal Characters |
| // |
| // This is an illegal character trap which is always the last rule in the |
| // lexer specification. It matches a single character of any value and being |
| // the last rule in the file will match when no other rule knows what to do |
| // about the character. It is reported as an error but is not passed on to the |
| // parser. This means that the parser to deal with the gramamr file anyway |
| // but we will not try to analyse or code generate from a file with lexical |
| // errors. |
| |
| // Comment this rule out to allow the error to be propagated to the parser |
| ERRCHAR |
| : . -> channel (HIDDEN) |
| ; |
| |
| // ====================================================== |
| // Lexer modes |
| // ------------------------- |
| // Arguments |
| mode Argument; |
| // E.g., [int x, List<String> a[]] |
| NESTED_ARGUMENT |
| : LBrack -> type (ARGUMENT_CONTENT) , pushMode (Argument) |
| ; |
| |
| ARGUMENT_ESCAPE |
| : EscAny -> type (ARGUMENT_CONTENT) |
| ; |
| |
| ARGUMENT_STRING_LITERAL |
| : DQuoteLiteral -> type (ARGUMENT_CONTENT) |
| ; |
| |
| ARGUMENT_CHAR_LITERAL |
| : SQuoteLiteral -> type (ARGUMENT_CONTENT) |
| ; |
| |
| END_ARGUMENT |
| : RBrack |
| { this.handleEndArgument(); } |
| ; |
| |
| // added this to return non-EOF token type here. EOF does something weird |
| UNTERMINATED_ARGUMENT |
| : EOF -> popMode |
| ; |
| |
| ARGUMENT_CONTENT |
| : . |
| ; |
| |
| // TODO: This grammar and the one used in the Intellij Antlr4 plugin differ |
| // for "actions". This needs to be resolved at some point. |
| // The Intellij Antlr4 grammar is here: |
| // https://github.com/antlr/intellij-plugin-v4/blob/1f36fde17f7fa63cb18d7eeb9cb213815ac658fb/src/main/antlr/org/antlr/intellij/plugin/parser/ANTLRv4Lexer.g4#L587 |
| |
| // ------------------------- |
| // Target Language Actions |
| // |
| // Many language targets use {} as block delimiters and so we |
| // must recursively match {} delimited blocks to balance the |
| // braces. Additionally, we must make some assumptions about |
| // literal string representation in the target language. We assume |
| // that they are delimited by ' or " and so consume these |
| // in their own alts so as not to inadvertantly match {}. |
| mode TargetLanguageAction; |
| NESTED_ACTION |
| : LBrace -> type (ACTION_CONTENT) , pushMode (TargetLanguageAction) |
| ; |
| |
| ACTION_ESCAPE |
| : EscAny -> type (ACTION_CONTENT) |
| ; |
| |
| ACTION_STRING_LITERAL |
| : DQuoteLiteral -> type (ACTION_CONTENT) |
| ; |
| |
| ACTION_CHAR_LITERAL |
| : SQuoteLiteral -> type (ACTION_CONTENT) |
| ; |
| |
| ACTION_DOC_COMMENT |
| : DocComment -> type (ACTION_CONTENT) |
| ; |
| |
| ACTION_BLOCK_COMMENT |
| : BlockComment -> type (ACTION_CONTENT) |
| ; |
| |
| ACTION_LINE_COMMENT |
| : LineComment -> type (ACTION_CONTENT) |
| ; |
| |
| END_ACTION |
| : RBrace |
| { this.handleEndAction(); } |
| ; |
| |
| UNTERMINATED_ACTION |
| : EOF -> popMode |
| ; |
| |
| ACTION_CONTENT |
| : . |
| ; |
| |
| // ------------------------- |
| mode LexerCharSet; |
| LEXER_CHAR_SET_BODY |
| : (~ [\]\\] | EscAny)+ -> more |
| ; |
| |
| LEXER_CHAR_SET |
| : RBrack -> popMode |
| ; |
| |
| UNTERMINATED_CHAR_SET |
| : EOF -> popMode |
| ; |
| |
| // ------------------------------------------------------------------------------ |
| // Grammar specific Keywords, Punctuation, etc. |
| fragment Id |
| : NameStartChar NameChar* |
| ; |
| |