jena-arq/Grammar/sse/tokens.inc - jena - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Basic tokens for SPARQL / RDF terms.
 // SSE - SPARQL S-Expressions
 // Not the keywords.


 TOKEN: { <WS: " " | "\t" | "\n" | "\r" | "\f"> }

 #ifdef SKIP
 SKIP : { <WS> }    //" " | "\t" | "\n" | "\r" | "\f" }
 #endif

 SPECIAL_TOKEN :
 {
     <SINGLE_LINE_COMMENT1: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? >
 |
     // Lisp-style comments (makes Emacs lisp-mode more useful
     <SINGLE_LINE_COMMENT2: ";" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? >
 }

 TOKEN:
 {
    <IRIref: "<" (~[">","<","\u0000"-"\u0020"])* ">" >
 |  <PNAME: (<PN_PREFIX>)? ":" (<PN_LOCAL>)? >
 |  <BLANK_NODE_LABEL: "_:" (<PN_LOCAL>)? >  // Allows no label

     // Co-ordinate with ARQConstants
     // Named variable - allows no name
 |  <VAR_NAMED:   "?" (<VARNAME>)?>

 //     // Non-distinguished variable (BNode in SPARQL)
 // |  <VAR_NAMED2:   "?." (~[" " , "\t" , "\n" , "\r" , "\f",
 //                             "(", ")", "[", "]", "{", "}"])* >
 //
 // |  <VAR_ANON:     "??" (~[ " " , "\t" , "\n" , "\r" , "\f",
 //                            "(", ")", "[", "]", "{", "}"])* >

 | <VAR_OTHER: "?" (<SYM>)+ >
 }

 TOKEN :
 {
   < #DIGITS: (["0"-"9"])+>
 | < INTEGER: (["+","-"])? <DIGITS> >
 | < DECIMAL: (["+","-"])? ( <DIGITS> "." (<DIGITS>)* | "." <DIGITS> ) >
 | < DOUBLE:   // Required exponent.
       ( (["+","-"])?
         (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
         | "." (["0"-"9"])+ (<EXPONENT>)
         | (["0"-"9"])+ <EXPONENT>
       )
       >
 | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
 | < #QUOTE_3D: "\"\"\"">
 | < #QUOTE_3S: "'''">
 | < ECHAR: "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") >
 | < STRING_LITERAL1:
       // Single quoted string
       "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" >
 | < STRING_LITERAL2:
     // Double quoted string
       "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" >
 | < STRING_LITERAL_LONG1:
      <QUOTE_3S>
       ( ("'" | "''")? (~["'","\\"] | <ECHAR> ))*
      <QUOTE_3S> >

 | < STRING_LITERAL_LONG2:
      <QUOTE_3D>
       ( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> ))*
      <QUOTE_3D> >
 }

 TOKEN :
 {
   < LPAREN:    "(" >
 | < RPAREN:    ")" >

 | < LBRACE:    "{" >
 | < RBRACE:    "}" >

 | < LBRACKET:  "[" >
 | < RBRACKET:  "]" >

 | < LT2 : "<<" >
 | < GT2 : ">>" >
 }

 // Specials for literals trailing parts
 // Otherwise include in Symbol() rule for when out of position.
 TOKEN :
 {
   < DATATYPE: "^^" >
 |  <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > : DEFAULT
 | < #AT: "@">
 |  <#A2Z: ["a"-"z","A"-"Z"]>
 |  <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
 }

 TOKEN:
 {
   // XML 1.1 NCNameStartChar without "_"
   <#PN_CHARS_BASE:
           ["A"-"Z"] | ["a"-"z"] |
           ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
           ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
           ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
           ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"]
           >
           // [#x10000-#xEFFFF]
 |
   <#PN_CHARS_U: <PN_CHARS_BASE> | "_" >
 |
 // No DOT
   <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" |
               ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
 |
   // No leading "_", no trailing ".", can have dot inside prefix name.
   <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)?  >
 |
   // With a leading "_", no dot at end of local name.
   <#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)?  >
 |
   // NCNAME without "-" and ".", allowing leading digits.
   <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] )
              ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" |
                ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >


 }

 TOKEN:
 {
   // Anything left that isn't structural
   // Excludes:
   //   LPAREN and RPAREN / LBRACKET/RBRACKET / LBRACE/RBRACE
   //   Quotes, Whitespace
   <#SYM:  (~["<", ">", "(", ")", "[", "]", "{", "}", "'", "\"", " ", "\t","\n","\r","\f" ])>
 |
   <#SYM1: (~["^", "@",
              "(", ")", "[", "]", "{", "}", "'", "\"", " ", "\t","\n","\r","\f" ])>
 | <#SYM_ESC: "\\" ( " " | "'" | "\"" ) >
 |
   <SYMBOL: ( "<" | ">"
            | (<SYM1> (<SYM>)*)
            ) >
 }

 // Catch-all tokens.  Must be last.
 // Any non-whitespace.  Causes a parser exception, rather than a
 // token manager error (with hidden line numbers).
 // Only bad IRIs (e.g. spaces) now give unhelpful parse errors.
 TOKEN:
 {
   <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
 }

 /*
 # Local Variables:
 # tab-width: 4
 # indent-tabs-mode: nil
 # comment-default-style: "//"
 # End:
 */
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	// Basic tokens for SPARQL / RDF terms.
	// SSE - SPARQL S-Expressions
	// Not the keywords.


	TOKEN: { <WS: " " \| "\t" \| "\n" \| "\r" \| "\f"> }

	#ifdef SKIP
	SKIP : { <WS> } //" " \| "\t" \| "\n" \| "\r" \| "\f" }
	#endif

	SPECIAL_TOKEN :
	{
	<SINGLE_LINE_COMMENT1: "#" (~["\n","\r"])* ("\n"\|"\r"\|"\r\n")? >
	\|
	// Lisp-style comments (makes Emacs lisp-mode more useful
	<SINGLE_LINE_COMMENT2: ";" (~["\n","\r"])* ("\n"\|"\r"\|"\r\n")? >
	}

	TOKEN:
	{
	<IRIref: "<" (~[">","<","\u0000"-"\u0020"])* ">" >
	\| <PNAME: (<PN_PREFIX>)? ":" (<PN_LOCAL>)? >
	\| <BLANK_NODE_LABEL: "_:" (<PN_LOCAL>)? > // Allows no label

	// Co-ordinate with ARQConstants
	// Named variable - allows no name
	\| <VAR_NAMED: "?" (<VARNAME>)?>

	// // Non-distinguished variable (BNode in SPARQL)
	// \| <VAR_NAMED2: "?." (~[" " , "\t" , "\n" , "\r" , "\f",
	// "(", ")", "[", "]", "{", "}"])* >
	//
	// \| <VAR_ANON: "??" (~[ " " , "\t" , "\n" , "\r" , "\f",
	// "(", ")", "[", "]", "{", "}"])* >

	\| <VAR_OTHER: "?" (<SYM>)+ >
	}

	TOKEN :
	{
	< #DIGITS: (["0"-"9"])+>
	\| < INTEGER: (["+","-"])? <DIGITS> >
	\| < DECIMAL: (["+","-"])? ( <DIGITS> "." (<DIGITS>)* \| "." <DIGITS> ) >
	\| < DOUBLE: // Required exponent.
	( (["+","-"])?
	(["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
	\| "." (["0"-"9"])+ (<EXPONENT>)
	\| (["0"-"9"])+ <EXPONENT>
	)
	>
	\| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
	\| < #QUOTE_3D: "\"\"\"">
	\| < #QUOTE_3S: "'''">
	\| < ECHAR: "\\" ( "t"\|"b"\|"n"\|"r"\|"f"\|"\\"\|"\""\|"'") >
	\| < STRING_LITERAL1:
	// Single quoted string
	"'" ( (~["'","\\","\n","\r"]) \| <ECHAR> )* "'" >
	\| < STRING_LITERAL2:
	// Double quoted string
	"\"" ( (~["\"","\\","\n","\r"]) \| <ECHAR> )* "\"" >
	\| < STRING_LITERAL_LONG1:
	<QUOTE_3S>
	( ("'" \| "''")? (~["'","\\"] \| <ECHAR> ))*
	<QUOTE_3S> >

	\| < STRING_LITERAL_LONG2:
	<QUOTE_3D>
	( ("\"" \| "\"\"")? (~["\"","\\"] \| <ECHAR> ))*
	<QUOTE_3D> >
	}

	TOKEN :
	{
	< LPAREN: "(" >
	\| < RPAREN: ")" >

	\| < LBRACE: "{" >
	\| < RBRACE: "}" >

	\| < LBRACKET: "[" >
	\| < RBRACKET: "]" >

	\| < LT2 : "<<" >
	\| < GT2 : ">>" >
	}

	// Specials for literals trailing parts
	// Otherwise include in Symbol() rule for when out of position.
	TOKEN :
	{
	< DATATYPE: "^^" >
	\| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > : DEFAULT
	\| < #AT: "@">
	\| <#A2Z: ["a"-"z","A"-"Z"]>
	\| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
	}

	TOKEN:
	{
	// XML 1.1 NCNameStartChar without "_"
	<#PN_CHARS_BASE:
	["A"-"Z"] \| ["a"-"z"] \|
	["\u00C0"-"\u00D6"] \| ["\u00D8"-"\u00F6"] \| ["\u00F8"-"\u02FF"] \|
	["\u0370"-"\u037D"] \| ["\u037F"-"\u1FFF"] \|
	["\u200C"-"\u200D"] \| ["\u2070"-"\u218F"] \| ["\u2C00"-"\u2FEF"] \|
	["\u3001"-"\uD7FF"] \| ["\uF900"-"\uFFFD"]
	>
	// [#x10000-#xEFFFF]
	\|
	<#PN_CHARS_U: <PN_CHARS_BASE> \| "_" >
	\|
	// No DOT
	<#PN_CHARS: (<PN_CHARS_U> \| "-" \| ["0"-"9"] \| "\u00B7" \|
	["\u0300"-"\u036F"] \| ["\u203F"-"\u2040"] ) >
	\|
	// No leading "_", no trailing ".", can have dot inside prefix name.
	<#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>\|".")* <PN_CHARS>)? >
	\|
	// With a leading "_", no dot at end of local name.
	<#PN_LOCAL: (<PN_CHARS_U> \| ["0"-"9"]) ((<PN_CHARS>\|".")* <PN_CHARS>)? >
	\|
	// NCNAME without "-" and ".", allowing leading digits.
	<#VARNAME: ( <PN_CHARS_U> \| ["0"-"9"] )
	( <PN_CHARS_U> \| ["0"-"9"] \| "\u00B7" \|
	["\u0300"-"\u036F"] \| ["\u203F"-"\u2040"] )* >


	}

	TOKEN:
	{
	// Anything left that isn't structural
	// Excludes:
	// LPAREN and RPAREN / LBRACKET/RBRACKET / LBRACE/RBRACE
	// Quotes, Whitespace
	<#SYM: (~["<", ">", "(", ")", "[", "]", "{", "}", "'", "\"", " ", "\t","\n","\r","\f" ])>
	\|
	<#SYM1: (~["^", "@",
	"(", ")", "[", "]", "{", "}", "'", "\"", " ", "\t","\n","\r","\f" ])>
	\| <#SYM_ESC: "\\" ( " " \| "'" \| "\"" ) >
	\|
	<SYMBOL: ( "<" \| ">"
	\| (<SYM1> (<SYM>)*)
	) >
	}

	// Catch-all tokens. Must be last.
	// Any non-whitespace. Causes a parser exception, rather than a
	// token manager error (with hidden line numbers).
	// Only bad IRIs (e.g. spaces) now give unhelpful parse errors.
	TOKEN:
	{
	<#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
	}

	/*
	# Local Variables:
	# tab-width: 4
	# indent-tabs-mode: nil
	# comment-default-style: "//"
	# End:
	*/