jena-core/Grammar/turtle.jj - jena - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 // Turtle & N3 for Jena

 // N3 compatible:
 // All legal Turtle documents are acceped by this grammar.
 // Some N3 features, which are stil RDF, are provided.

 options
 {
   // Use \ u escapes in streams AND use a reader for the query
   // => get both raw and escaped unicode

    JAVA_UNICODE_ESCAPE   = true ;
    UNICODE_INPUT         = false ;

   STATIC                = false ;
 //   DEBUG_PARSER          = true ;
 //   DEBUG_TOKEN_MANAGER   = true ;
 }

 PARSER_BEGIN(TurtleParser)
 /*
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package com.hp.hpl.jena.n3.turtle.parser ;

 import com.hp.hpl.jena.n3.turtle.ParserBase ;
 import com.hp.hpl.jena.graph.* ;

 public class TurtleParser extends ParserBase
 {
 }
 PARSER_END(TurtleParser)

 // --- Entry point

 void parse() : {}
 {
   (<BOM>)?
   (Statement())* <EOF>
 }

 void Statement() : {}
 {
   (Directive() | TriplesSameSubject() )
   <DOT>
 }

 void Directive() : { Token t ; String iri ; }
 {
     // Note that @prefix looks like a LANTAG
     <PREFIX> t = <PNAME_NS> iri = IRI_REF()
     { String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ;
       setPrefix(t.beginLine, t.beginColumn, s, iri) ; }
  |
     t = <BASE> iri = IRI_REF()
     { setBase(iri, t.beginLine, t.beginColumn) ; }
 }

 // -------- PATH
 TOKEN:
 {
    < PLING:    "!" >
  | < VBAR:     "|" >
  | < CARROT:   "^" >
  | < FPATH:    "->" >
  | < RPATH:    "<-" >
 }

 // ---- TRIPLES

 void TriplesSameSubject() : { Node s ; }
 {
   s = VarOrTerm()       // Emit Triple checks it's a valid triple.
   PropertyListNotEmpty(s)
 |
   // Any of the triple generating syntax elements
   s = TriplesNode()
   PropertyList(s)
 }


 void PropertyList(Node s) : { }
 {
   ( PropertyListNotEmpty(s) ) ?
 }

 // Non-recursive for Turtle long PropertyList tests
 void PropertyListNotEmpty(Node s) : { Node p ; }
 {
   p = Verb()
   ObjectList(s, p)
   (<SEMICOLON> (p = Verb() ObjectList(s, p))? )*
 }

 // Non-recursive for Turtle long PropertyList tests
 void ObjectList(Node s, Node p): { Node o ; }
 {
   Object(s, p)
   ( <COMMA> Object(s, p) )*
 }

 void Object(Node s, Node p): { Node o ; }
 {
   o = GraphNode()
   { Triple t = new Triple(s,p,o) ;
     emitTriple(token.beginLine, token.beginColumn, t) ; }
 }

 Node Verb() : { Node p ; String iri ;}
 {
    ( iri = IRIref() { p = createNode(iri) ; }
    | <KW_A> { p = nRDFtype ; }
    | <EQ>
       { p = nOwlSameAs ;
         if ( strictTurtle )
           throwParseException("= (owl:sameAs) not legal in Turtle",
                           token.beginLine, token.beginColumn ) ;
      }
    | <ARROW>
       { p = nLogImplies ;
         if ( strictTurtle )
           throwParseException("=> (log:implies) not legal in Turtle",
                           token.beginLine, token.beginColumn ) ;
       }
     )
   { return p ; }
 }

 // -------- Triple expansions

 // Anything that can stand in a node slot and which is
 // a number of triples

 Node TriplesNode() : { Node n ; }
 {
   n = Collection() { return n ; }
  |
   n = BlankNodePropertyList() { return n ; }
 }

 Node BlankNodePropertyList() : { }
 {
   <LBRACKET>
     { Node n = createBNode() ; }
   PropertyListNotEmpty(n)
   <RBRACKET>
     { return n ; }
 }


 // ------- RDF collections

 // Code not as SPARQL/ARQ because of output ordering.
 Node Collection() :
     { Node listHead = nRDFnil ; Node lastCell = null ; Node n ; }
 {
   <LPAREN>
   (
     { Node cell = createBNode() ;
       if ( listHead == nRDFnil )
          listHead = cell ;
       if ( lastCell != null )
         emitTriple(token.beginLine, token.beginColumn,
                    new Triple(lastCell, nRDFrest,  cell)) ;
     }
     n = GraphNode()
     {
       emitTriple(token.beginLine, token.beginColumn,
                  new Triple(cell, nRDFfirst,  n)) ;
       lastCell = cell ;
     }
   ) +
   // Not * here - "()" is handled separately.
   <RPAREN>
    { if ( lastCell != null )
        emitTriple(token.beginLine, token.beginColumn,
                   new Triple(lastCell, nRDFrest,  nRDFnil)) ;
      return listHead ; }
 }

 // -------- Nodes in a graph pattern or template

 Node GraphNode() : { Node n ; }
 {
   n = VarOrTerm() { return n ; }
  |
   n = TriplesNode() { return n ; }
 }

 Node VarOrTerm() : {Node n = null ; }
 {
   ( n = Var() | n = GraphTerm() | n = Formula() )
   { return n ; }
 }

 Node Formula() : {Token t ; }
 {
     t = <LBRACE> { startFormula(t.beginLine, t.beginColumn) ; }
     TriplesSameSubject()
     ( <DOT> (TriplesSameSubject())? )*
     t = <RBRACE> { endFormula(t.beginLine, t.beginColumn) ; }
       { return null ; }
 }

 Node Var() : { Token t ;}
 {
     t = <VAR>
     { return createVariable(t.image, t.beginLine, t.beginColumn) ; }
 }

 Node GraphTerm() : { Node n ; String iri ; }
 {
   iri = IRIref()    { return createNode(iri) ; }
 |
   n = RDFLiteral()      { return n ; }
 |
   // Cleaner sign handling in Turtle.
   n = NumericLiteral()  { return n ; }
 |
   n = BooleanLiteral()  { return n ; }
 |
   n = BlankNode()       { return n ; }
 |
 //  <LPAREN> <RPAREN>     { return nRDFnil ; }
   <NIL>  { return nRDFnil ; }
 }
 // ---- Basic terms

 Node NumericLiteral() : { Token t ; }
 {
   t = <INTEGER> { return createLiteralInteger(t.image) ; }
 | t = <DECIMAL> { return createLiteralDecimal(t.image) ; }
 | t = <DOUBLE> { return createLiteralDouble(t.image) ; }
 }

 Node RDFLiteral() : { Token t ; String lex = null ; }
 {
   lex = String()
   // Optional lang tag and datatype.
   { String lang = null ; String dt = null ; }
   (
     lang = Langtag()
   |
     ( <DATATYPE> dt = IRIref() )
   )?
     { return createLiteral(lex, lang, dt) ; }
 }

 String Langtag() : { Token t ; }
 {
   // Enumerate the directives here because they look like language tags.
   ( t = <LANGTAG> | t = AnyDirective() )
   { String lang = stripChars(t.image, 1) ; return lang ; }
 }

 Token AnyDirective() : { Token t ; }
 {
     ( t = <PREFIX> | t = <BASE> ) { return t ; }
 }

 Node BooleanLiteral() : {}
 {
    <TRUE> { return XSD_TRUE ; }
   |
    <FALSE> { return XSD_FALSE ; }
 }

 String String() : { Token t ;  String lex ; }
 {
   ( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
   | t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
   | t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
   | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
   )
     { lex = unescapeStr(lex,  t.beginLine, t.beginColumn) ;
       return lex ;
     }
 }

 String IRIref() : { String iri ; }
 {
   iri = IRI_REF() { return iri ; }
 |
   iri = PrefixedName() { return iri ; }
 }

 String PrefixedName() : { Token t ; }
 {
   ( t = <PNAME_LN>
     { return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
   |
     t = <PNAME_NS>
     { return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
   )
 }

 Node BlankNode() :  { Token t = null ; }
 {
   t = <BLANK_NODE_LABEL>
     { return createBNode(t.image, t.beginLine, t.beginColumn) ; }
 |
 //  <LBRACKET> <RBRACKET> { return createBNode() ; }
   <ANON> { return createBNode() ; }

 }

 String IRI_REF() : { Token t ; }
 {
   t = <IRIref>
   { return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; }
 }

 // ------------------------------------------
 // Tokens

 // Comments and whitespace

 SKIP : { " " | "\t" | "\n" | "\r" | "\f" }

 TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> }

 SPECIAL_TOKEN :
 { <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > }

 // -------------------------------------------------
 // Keywords : directives before LANGTAG

 TOKEN :
 {
   <KW_A:  "a" >
 // Prologue
 | < PREFIX:      "@prefix" >
 | < BASE:        "@base" >
 }


 TOKEN [IGNORE_CASE] :
 {
   < TRUE:        "true" >
 | < FALSE:       "false" >

 // -------------------------------------------------

 |   < INTEGER: (["-","+"])? <DIGITS> >
 |
    < DECIMAL: (["-","+"])?
               ((<DIGITS>)+ "." (<DIGITS>)* | "." (<DIGITS>)+)
    >
       // Required exponent.
 |  < DOUBLE:
       (["+","-"])?
       (
         (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
         | "." (["0"-"9"])+ (<EXPONENT>)
         | (["0"-"9"])+ <EXPONENT>
       )
       >
 | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
 | < #QUOTE_3D: "\"\"\"">
 | < #QUOTE_3S: "'''">
 // "u" done by javacc input stream.
 // "U" escapes not supported yet for Java strings
 | <ECHAR: "\\" ("t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'")>

 | < STRING_LITERAL1:
       // Single quoted string
       "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" >

 | < STRING_LITERAL2:
     // Double quoted string
       "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" >

 | < STRING_LITERAL_LONG1:
      <QUOTE_3S>
       ( ~["'","\\"] | <ECHAR> | ("'" ~["'"]) | ("''" ~["'"]))*
      <QUOTE_3S> >

 | < STRING_LITERAL_LONG2:
      <QUOTE_3D>
       ( ~["\"","\\"] | <ECHAR> | ("\"" ~["\""]) | ("\"\"" ~["\""]))*
      <QUOTE_3D> >
 | < DIGITS: (["0"-"9"])+>
 // | <HEX: ["0"-"9"] | ["A"-"F"] | ["a"-"f"]>
 }

 TOKEN:
 {
    // Includes # for relative URIs
    <IRIref: "<" (~[ ">","<", "\"", "{", "}", "^", "\\", "|", "`",
                       "\u0000"-"\u0020"])* ">" >
 |  <PNAME_NS: (<PN_PREFIX>)? ":" >
 |  <PNAME_LN: <PNAME_NS> <PN_LOCAL> >
 |  <BLANK_NODE_LABEL: "_:" <PN_LOCAL> >
 |  <VAR: "?" <VARNAME> >
 |  <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* >
 |  <#A2Z: ["a"-"z","A"-"Z"]>
 |  <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
 }


 TOKEN :
 {
   < LPAREN:    "(" >
 | < RPAREN:    ")" >

 | <NIL:        <LPAREN> (<WS>|<SINGLE_LINE_COMMENT>)* <RPAREN> >

 | < LBRACE:    "{" >
 | < RBRACE:    "}" >

 | < LBRACKET:  "[" >
 | < RBRACKET:  "]" >
 | < ANON:      <LBRACKET> (<WS>|<SINGLE_LINE_COMMENT>)* <RBRACKET> >

 | < SEMICOLON: ";" >
 | < COMMA:     "," >
 | < DOT:       "." >
 }

 // Operator

 TOKEN :
 {
   < EQ:      "=" >
 | <ARROW:    "=>">

 | < DOLLAR:  "$">
 | < QMARK:   "?">

 | < TILDE:   "~" >
 | < COLON:   ":" >

 // | < PLUS:    "+" >
 // | < MINUS:   "-" >
 | < STAR:    "*" >
 | < SLASH:   "/" >
 | < RSLASH:   "\\" >
 | < BOM:     "\uFEFF">

 //| < AMP: "&" >
 //| < REM: "%" >

 | < DATATYPE: "^^">
 | < AT: "@">
 }

 TOKEN:
 {
   <#PN_CHARS_BASE:
           ["A"-"Z"] | ["a"-"z"] |
           ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
           ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
           ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
           ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"]
           >
           // [#x10000-#xEFFFF]
 |
   <#PN_CHARS_U: <PN_CHARS_BASE> | "_" >
 |
 // No DOT
   <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" |
               ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
 |
   // No leading "_", no trailing ".", can have dot inside prefix name.
   <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? >
 |
   // With a leading "_", no dot at end of local name.
   <#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)? >
 |
   // NCNAME without "-" and ".", allowing leading digits.
   <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] )
              ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" |
                ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >
 }

 // Catch-all tokens.  Must be last.
 // Any non-whitespace.  Causes a parser exception, rather than a
 // token manager error (with hidden line numbers).
 // Only bad IRIs (e.g. spaces) now give unhelpful parse errors.
 TOKEN:
 {
   <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
 }

 /*
 # Local Variables:
 # tab-width: 4
 # indent-tabs-mode: nil
 # comment-default-style: "//"
 # End:
 */
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	// Turtle & N3 for Jena

	// N3 compatible:
	// All legal Turtle documents are acceped by this grammar.
	// Some N3 features, which are stil RDF, are provided.

	options
	{
	// Use \ u escapes in streams AND use a reader for the query
	// => get both raw and escaped unicode

	JAVA_UNICODE_ESCAPE = true ;
	UNICODE_INPUT = false ;

	STATIC = false ;
	// DEBUG_PARSER = true ;
	// DEBUG_TOKEN_MANAGER = true ;
	}

	PARSER_BEGIN(TurtleParser)
	/*
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package com.hp.hpl.jena.n3.turtle.parser ;

	import com.hp.hpl.jena.n3.turtle.ParserBase ;
	import com.hp.hpl.jena.graph.* ;

	public class TurtleParser extends ParserBase
	{
	}
	PARSER_END(TurtleParser)

	// --- Entry point

	void parse() : {}
	{
	(<BOM>)?
	(Statement())* <EOF>
	}

	void Statement() : {}
	{
	(Directive() \| TriplesSameSubject() )
	<DOT>
	}

	void Directive() : { Token t ; String iri ; }
	{
	// Note that @prefix looks like a LANTAG
	<PREFIX> t = <PNAME_NS> iri = IRI_REF()
	{ String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ;
	setPrefix(t.beginLine, t.beginColumn, s, iri) ; }
	\|
	t = <BASE> iri = IRI_REF()
	{ setBase(iri, t.beginLine, t.beginColumn) ; }
	}

	// -------- PATH
	TOKEN:
	{
	< PLING: "!" >
	\| < VBAR: "\|" >
	\| < CARROT: "^" >
	\| < FPATH: "->" >
	\| < RPATH: "<-" >
	}

	// ---- TRIPLES

	void TriplesSameSubject() : { Node s ; }
	{
	s = VarOrTerm() // Emit Triple checks it's a valid triple.
	PropertyListNotEmpty(s)
	\|
	// Any of the triple generating syntax elements
	s = TriplesNode()
	PropertyList(s)
	}


	void PropertyList(Node s) : { }
	{
	( PropertyListNotEmpty(s) ) ?
	}

	// Non-recursive for Turtle long PropertyList tests
	void PropertyListNotEmpty(Node s) : { Node p ; }
	{
	p = Verb()
	ObjectList(s, p)
	(<SEMICOLON> (p = Verb() ObjectList(s, p))? )*
	}

	// Non-recursive for Turtle long PropertyList tests
	void ObjectList(Node s, Node p): { Node o ; }
	{
	Object(s, p)
	( <COMMA> Object(s, p) )*
	}

	void Object(Node s, Node p): { Node o ; }
	{
	o = GraphNode()
	{ Triple t = new Triple(s,p,o) ;
	emitTriple(token.beginLine, token.beginColumn, t) ; }
	}

	Node Verb() : { Node p ; String iri ;}
	{
	( iri = IRIref() { p = createNode(iri) ; }
	\| <KW_A> { p = nRDFtype ; }
	\| <EQ>
	{ p = nOwlSameAs ;
	if ( strictTurtle )
	throwParseException("= (owl:sameAs) not legal in Turtle",
	token.beginLine, token.beginColumn ) ;
	}
	\| <ARROW>
	{ p = nLogImplies ;
	if ( strictTurtle )
	throwParseException("=> (log:implies) not legal in Turtle",
	token.beginLine, token.beginColumn ) ;
	}
	)
	{ return p ; }
	}

	// -------- Triple expansions

	// Anything that can stand in a node slot and which is
	// a number of triples

	Node TriplesNode() : { Node n ; }
	{
	n = Collection() { return n ; }
	\|
	n = BlankNodePropertyList() { return n ; }
	}

	Node BlankNodePropertyList() : { }
	{
	<LBRACKET>
	{ Node n = createBNode() ; }
	PropertyListNotEmpty(n)
	<RBRACKET>
	{ return n ; }
	}


	// ------- RDF collections

	// Code not as SPARQL/ARQ because of output ordering.
	Node Collection() :
	{ Node listHead = nRDFnil ; Node lastCell = null ; Node n ; }
	{
	<LPAREN>
	(
	{ Node cell = createBNode() ;
	if ( listHead == nRDFnil )
	listHead = cell ;
	if ( lastCell != null )
	emitTriple(token.beginLine, token.beginColumn,
	new Triple(lastCell, nRDFrest, cell)) ;
	}
	n = GraphNode()
	{
	emitTriple(token.beginLine, token.beginColumn,
	new Triple(cell, nRDFfirst, n)) ;
	lastCell = cell ;
	}
	) +
	// Not * here - "()" is handled separately.
	<RPAREN>
	{ if ( lastCell != null )
	emitTriple(token.beginLine, token.beginColumn,
	new Triple(lastCell, nRDFrest, nRDFnil)) ;
	return listHead ; }
	}

	// -------- Nodes in a graph pattern or template

	Node GraphNode() : { Node n ; }
	{
	n = VarOrTerm() { return n ; }
	\|
	n = TriplesNode() { return n ; }
	}

	Node VarOrTerm() : {Node n = null ; }
	{
	( n = Var() \| n = GraphTerm() \| n = Formula() )
	{ return n ; }
	}

	Node Formula() : {Token t ; }
	{
	t = <LBRACE> { startFormula(t.beginLine, t.beginColumn) ; }
	TriplesSameSubject()
	( <DOT> (TriplesSameSubject())? )*
	t = <RBRACE> { endFormula(t.beginLine, t.beginColumn) ; }
	{ return null ; }
	}

	Node Var() : { Token t ;}
	{
	t = <VAR>
	{ return createVariable(t.image, t.beginLine, t.beginColumn) ; }
	}

	Node GraphTerm() : { Node n ; String iri ; }
	{
	iri = IRIref() { return createNode(iri) ; }
	\|
	n = RDFLiteral() { return n ; }
	\|
	// Cleaner sign handling in Turtle.
	n = NumericLiteral() { return n ; }
	\|
	n = BooleanLiteral() { return n ; }
	\|
	n = BlankNode() { return n ; }
	\|
	// <LPAREN> <RPAREN> { return nRDFnil ; }
	<NIL> { return nRDFnil ; }
	}
	// ---- Basic terms

	Node NumericLiteral() : { Token t ; }
	{
	t = <INTEGER> { return createLiteralInteger(t.image) ; }
	\| t = <DECIMAL> { return createLiteralDecimal(t.image) ; }
	\| t = <DOUBLE> { return createLiteralDouble(t.image) ; }
	}

	Node RDFLiteral() : { Token t ; String lex = null ; }
	{
	lex = String()
	// Optional lang tag and datatype.
	{ String lang = null ; String dt = null ; }
	(
	lang = Langtag()
	\|
	( <DATATYPE> dt = IRIref() )
	)?
	{ return createLiteral(lex, lang, dt) ; }
	}

	String Langtag() : { Token t ; }
	{
	// Enumerate the directives here because they look like language tags.
	( t = <LANGTAG> \| t = AnyDirective() )
	{ String lang = stripChars(t.image, 1) ; return lang ; }
	}

	Token AnyDirective() : { Token t ; }
	{
	( t = <PREFIX> \| t = <BASE> ) { return t ; }
	}

	Node BooleanLiteral() : {}
	{
	<TRUE> { return XSD_TRUE ; }
	\|
	<FALSE> { return XSD_FALSE ; }
	}

	String String() : { Token t ; String lex ; }
	{
	( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
	\| t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
	\| t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
	\| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
	)
	{ lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
	return lex ;
	}
	}

	String IRIref() : { String iri ; }
	{
	iri = IRI_REF() { return iri ; }
	\|
	iri = PrefixedName() { return iri ; }
	}

	String PrefixedName() : { Token t ; }
	{
	( t = <PNAME_LN>
	{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
	\|
	t = <PNAME_NS>
	{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
	)
	}

	Node BlankNode() : { Token t = null ; }
	{
	t = <BLANK_NODE_LABEL>
	{ return createBNode(t.image, t.beginLine, t.beginColumn) ; }
	\|
	// <LBRACKET> <RBRACKET> { return createBNode() ; }
	<ANON> { return createBNode() ; }

	}

	String IRI_REF() : { Token t ; }
	{
	t = <IRIref>
	{ return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; }
	}

	// ------------------------------------------
	// Tokens

	// Comments and whitespace

	SKIP : { " " \| "\t" \| "\n" \| "\r" \| "\f" }

	TOKEN: { <#WS: " " \| "\t" \| "\n" \| "\r" \| "\f"> }

	SPECIAL_TOKEN :
	{ <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"\|"\r"\|"\r\n")? > }

	// -------------------------------------------------
	// Keywords : directives before LANGTAG

	TOKEN :
	{
	<KW_A: "a" >
	// Prologue
	\| < PREFIX: "@prefix" >
	\| < BASE: "@base" >
	}


	TOKEN [IGNORE_CASE] :
	{
	< TRUE: "true" >
	\| < FALSE: "false" >

	// -------------------------------------------------

	\| < INTEGER: (["-","+"])? <DIGITS> >
	\|
	< DECIMAL: (["-","+"])?
	((<DIGITS>)+ "." (<DIGITS>)* \| "." (<DIGITS>)+)
	>
	// Required exponent.
	\| < DOUBLE:
	(["+","-"])?
	(
	(["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
	\| "." (["0"-"9"])+ (<EXPONENT>)
	\| (["0"-"9"])+ <EXPONENT>
	)
	>
	\| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
	\| < #QUOTE_3D: "\"\"\"">
	\| < #QUOTE_3S: "'''">
	// "u" done by javacc input stream.
	// "U" escapes not supported yet for Java strings
	\| <ECHAR: "\\" ("t"\|"b"\|"n"\|"r"\|"f"\|"\\"\|"\""\|"'")>

	\| < STRING_LITERAL1:
	// Single quoted string
	"'" ( (~["'","\\","\n","\r"]) \| <ECHAR> )* "'" >

	\| < STRING_LITERAL2:
	// Double quoted string
	"\"" ( (~["\"","\\","\n","\r"]) \| <ECHAR> )* "\"" >

	\| < STRING_LITERAL_LONG1:
	<QUOTE_3S>
	( ~["'","\\"] \| <ECHAR> \| ("'" ~["'"]) \| ("''" ~["'"]))*
	<QUOTE_3S> >

	\| < STRING_LITERAL_LONG2:
	<QUOTE_3D>
	( ~["\"","\\"] \| <ECHAR> \| ("\"" ~["\""]) \| ("\"\"" ~["\""]))*
	<QUOTE_3D> >
	\| < DIGITS: (["0"-"9"])+>
	// \| <HEX: ["0"-"9"] \| ["A"-"F"] \| ["a"-"f"]>
	}

	TOKEN:
	{
	// Includes # for relative URIs
	<IRIref: "<" (~[ ">","<", "\"", "{", "}", "^", "\\", "\|", "`",
	"\u0000"-"\u0020"])* ">" >
	\| <PNAME_NS: (<PN_PREFIX>)? ":" >
	\| <PNAME_LN: <PNAME_NS> <PN_LOCAL> >
	\| <BLANK_NODE_LABEL: "_:" <PN_LOCAL> >
	\| <VAR: "?" <VARNAME> >
	\| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* >
	\| <#A2Z: ["a"-"z","A"-"Z"]>
	\| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
	}


	TOKEN :
	{
	< LPAREN: "(" >
	\| < RPAREN: ")" >

	\| <NIL: <LPAREN> (<WS>\|<SINGLE_LINE_COMMENT>)* <RPAREN> >

	\| < LBRACE: "{" >
	\| < RBRACE: "}" >

	\| < LBRACKET: "[" >
	\| < RBRACKET: "]" >
	\| < ANON: <LBRACKET> (<WS>\|<SINGLE_LINE_COMMENT>)* <RBRACKET> >

	\| < SEMICOLON: ";" >
	\| < COMMA: "," >
	\| < DOT: "." >
	}

	// Operator

	TOKEN :
	{
	< EQ: "=" >
	\| <ARROW: "=>">

	\| < DOLLAR: "$">
	\| < QMARK: "?">

	\| < TILDE: "~" >
	\| < COLON: ":" >

	// \| < PLUS: "+" >
	// \| < MINUS: "-" >
	\| < STAR: "*" >
	\| < SLASH: "/" >
	\| < RSLASH: "\\" >
	\| < BOM: "\uFEFF">

	//\| < AMP: "&" >
	//\| < REM: "%" >

	\| < DATATYPE: "^^">
	\| < AT: "@">
	}

	TOKEN:
	{
	<#PN_CHARS_BASE:
	["A"-"Z"] \| ["a"-"z"] \|
	["\u00C0"-"\u00D6"] \| ["\u00D8"-"\u00F6"] \| ["\u00F8"-"\u02FF"] \|
	["\u0370"-"\u037D"] \| ["\u037F"-"\u1FFF"] \|
	["\u200C"-"\u200D"] \| ["\u2070"-"\u218F"] \| ["\u2C00"-"\u2FEF"] \|
	["\u3001"-"\uD7FF"] \| ["\uF900"-"\uFFFD"]
	>
	// [#x10000-#xEFFFF]
	\|
	<#PN_CHARS_U: <PN_CHARS_BASE> \| "_" >
	\|
	// No DOT
	<#PN_CHARS: (<PN_CHARS_U> \| "-" \| ["0"-"9"] \| "\u00B7" \|
	["\u0300"-"\u036F"] \| ["\u203F"-"\u2040"] ) >
	\|
	// No leading "_", no trailing ".", can have dot inside prefix name.
	<#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>\|".")* <PN_CHARS>)? >
	\|
	// With a leading "_", no dot at end of local name.
	<#PN_LOCAL: (<PN_CHARS_U> \| ["0"-"9"]) ((<PN_CHARS>\|".")* <PN_CHARS>)? >
	\|
	// NCNAME without "-" and ".", allowing leading digits.
	<#VARNAME: ( <PN_CHARS_U> \| ["0"-"9"] )
	( <PN_CHARS_U> \| ["0"-"9"] \| "\u00B7" \|
	["\u0300"-"\u036F"] \| ["\u203F"-"\u2040"] )* >
	}

	// Catch-all tokens. Must be last.
	// Any non-whitespace. Causes a parser exception, rather than a
	// token manager error (with hidden line numbers).
	// Only bad IRIs (e.g. spaces) now give unhelpful parse errors.
	TOKEN:
	{
	<#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
	}

	/*
	# Local Variables:
	# tab-width: 4
	# indent-tabs-mode: nil
	# comment-default-style: "//"
	# End:
	*/