blob: 2fcba50ce4543e9a1673b3e653045e1c496cba3c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Turtle & N3 for Jena
// N3 compatible:
// Projection to Turtle is done by filtering non-Trutle forms.
// A consequence is that where Turtle and N3 are incompatible (e.g.
// whitespace processing) if follows N3.
//
// All legal Turtle documents are acceped by this grammar.
options
{
// Use \ u escapes in streams AND use a reader for the query
// => get both raw and escaped unicode
JAVA_UNICODE_ESCAPE = true ;
// We use a UTF-8 encoded stream anyway so the setting of
// this is not important.
// and it does not make any difference to the code generated!
// One a warning is issued if true.
UNICODE_INPUT = false ;
STATIC = false ;
// DEBUG_PARSER = true ;
// DEBUG_TOKEN_MANAGER = true ;
}
PARSER_BEGIN(TurtleParser)
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hp.hpl.jena.n3.turtle.parser ;
import com.hp.hpl.jena.n3.turtle.ParserBase ;
import com.hp.hpl.jena.graph.* ;
public class TurtleParser extends ParserBase
{
}
PARSER_END(TurtleParser)
// --- Entry point
void parse() : {}
{
(Statement())* <EOF>
}
void Statement() : {}
{
(Directive() | TriplesSameSubject() )
<DOT>
}
void Directive() : { Token t ; Node n ; }
{
// Need a token for @prefix?
// Looks like a LANTAG
<PREFIX> t = <QNAME_NS> n = Q_IRI_REF()
{ String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ;
setPrefix(t.beginLine, t.beginColumn, s, n.getURI()) ; }
}
// -------- PATH
TOKEN:
{
< PLING: "!" >
| < VBAR: "|" >
| < CARROT: "^" >
| < FPATH: "->" >
| < RPATH: "<-" >
}
// N3
// ---- TRIPLES
// <<<<< SPARQL extract
void TriplesSameSubject() : { Node s ; }
{
s = VarOrTerm()
PropertyListNotEmpty(s)
|
// Any of the triple generating syntax elements
s = TriplesNode()
PropertyList(s)
}
void PropertyList(Node s) : { }
{
( PropertyListNotEmpty(s) ) ?
}
// >>>>> SPARQL extract
// Non-recursive for Turtle long PropertyList tests
void PropertyListNotEmpty(Node s) : { Node p ; }
{
p = Verb()
ObjectList(s, p)
(<SEMICOLON> (p = Verb() ObjectList(s, p))? )*
}
// Non-recursive for Turtle long PropertyList tests
void ObjectList(Node s, Node p): { Node o ; }
{
Object(s, p)
( <COMMA> Object(s, p) )*
}
void Object(Node s, Node p): { Node o ; }
{
o = GraphNode()
{ Triple t = new Triple(s,p,o) ;
emitTriple(token.beginLine, token.beginColumn, t) ; }
}
// <<<<< SPARQL extract
Node Verb() : {Node p ;}
{
( p = IRIref() | <KW_A> { p = nRDFtype ; } |
<ARROW>
{ p = nLogImplies ;
if ( strictTurtle )
raiseException("=> (log:implies) not legalin Turtle",
token.beginLine, token.beginColumn ) ;
}
)
{ return p ; }
}
// -------- Triple expansions
// Anything that can stand in a node slot and which is
// a number of triples
Node TriplesNode() : { Node n ; }
{
n = Collection() { return n ; }
|
n = BlankNodePropertyList() { return n ; }
}
Node BlankNodePropertyList() : { }
{
<LBRACKET>
{ Node n = createBNode() ; }
PropertyListNotEmpty(n)
<RBRACKET>
{ return n ; }
}
// ------- RDF collections
// Code not as SPARQL/ARQ because of output ordering.
Node Collection() :
{ Node listHead = nRDFnil ; Node lastCell = null ; Node n ; }
{
<LPAREN>
(
{ Node cell = createBNode() ;
if ( listHead == nRDFnil )
listHead = cell ;
if ( lastCell != null )
emitTriple(token.beginLine, token.beginColumn,
new Triple(lastCell, nRDFrest, cell)) ;
}
n = GraphNode()
{
emitTriple(token.beginLine, token.beginColumn,
new Triple(cell, nRDFfirst, n)) ;
lastCell = cell ;
}
) +
// Not * here - "()" is handled separately.
<RPAREN>
{ if ( lastCell != null )
emitTriple(token.beginLine, token.beginColumn,
new Triple(lastCell, nRDFrest, nRDFnil)) ;
return listHead ; }
}
// -------- Nodes in a graph pattern or template
Node GraphNode() : { Node n ; }
{
n = VarOrTerm() { return n ; }
|
n = TriplesNode() { return n ; }
}
Node VarOrTerm() : {Node n = null ; }
{
( n = Var() | n = GraphTerm() | n = Formula() )
{ return n ; }
}
Node Formula() : {Token t ; }
{
t = <LBRACE> { startFormula(t.beginLine, t.beginColumn) ; }
// Need to sort this out and merge with Statement above
TriplesSameSubject()
( <DOT> (TriplesSameSubject())? )*
t = <RBRACE> { endFormula(t.beginLine, t.beginColumn) ; }
{ return null ; }
}
// >>>>> SPARQL extract
Node Var() : { Token t ;}
{
( t = <VAR1> | t = <VAR2> )
{ return createVariable(t.image, t.beginLine, t.beginColumn) ; }
}
Node GraphTerm() : { Node n ; }
{
n = IRIref() { return n ; }
|
n = RDFLiteral() { return n ; }
|
// Cleaner sign handling in Turtle.
n = NumericLiteral() { return n ; }
// |
// n = BooleanLiteral() { return n ; }
|
n = BlankNode() { return n ; }
|
// <LPAREN> <RPAREN> { return nRDFnil ; }
<NIL> { return nRDFnil ; }
}
// ---- Basic terms
Node NumericLiteral() : { Token t ; }
{
t = <INTEGER> { return makeNodeInteger(t.image) ; }
| t = <DECIMAL> { return makeNodeDecimal(t.image) ; }
| t = <DOUBLE> { return makeNodeDouble(t.image) ; }
}
// >>>>> SPARQL extract
// Langtag oddity.
Node RDFLiteral() : { Token t ; String lex = null ; }
{
lex = String()
// Optional lang tag and datatype.
{ String lang = null ; Node uri = null ; }
(
lang = Langtag()
|
( <DATATYPE> uri = IRIref() )
)?
{ return makeNode(lex, lang, uri) ; }
}
String Langtag() : { Token t ; }
{
// Enumerate the directives here
( t = <LANGTAG> | t = <PREFIX> )
{ String lang = stripChars(t.image, 1) ; return lang ; }
}
// >>>>> SPARQL extract
// Node BooleanLiteral() : {}
// {
// <TRUE> { return XSD_TRUE ; }
// |
// <FALSE> { return XSD_FALSE ; }
// }
// <<<<< SPARQL extract
String String() : { Token t ; String lex ; }
{
( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
)
{ lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
return lex ;
}
}
Node IRIref() : { Node n ; }
{
n = Q_IRI_REF() { return n ; }
|
n = QName() { return n ; }
}
Node QName() : { Token t ; }
{
( t = <QNAME>
{ return createURIfromQName(t.image, t.beginLine, t.beginColumn) ; }
|
t = <QNAME_NS>
{ return createURIfromQName(t.image, t.beginLine, t.beginColumn) ; }
)
}
Node BlankNode() : { Token t = null ; }
{
t = <BLANK_NODE_LABEL>
{ return createBNode(t.image, t.beginLine, t.beginColumn) ; }
|
// <LBRACKET> <RBRACKET> { return createBNode() ; }
<ANON> { return createBNode() ; }
}
Node Q_IRI_REF() : { Token t ; }
{
t = <Q_IRIref>
{ return createNodeFromURI(t.image, t.beginLine, t.beginColumn) ; }
}
// ------------------------------------------
// Tokens
// Comments and whitespace
SKIP : { " " | "\t" | "\n" | "\r" | "\f" }
TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> }
SPECIAL_TOKEN :
{ <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > }
// -------------------------------------------------
// Keyworks : directives before LANGTAG
TOKEN : { <KW_A: "a" > } // Before HEX rule!
TOKEN [IGNORE_CASE] :
{
// Prologue
< PREFIX: "@prefix" >
| < BASE: "@base" >
//| < TRUE: "true" >
//| < FALSE: "false" >
}
// -------------------------------------------------
TOKEN :
{
< INTEGER: (["-","+"])? <DIGITS> >
|
< DECIMAL: (["-","+"])?
((<DIGITS>)+ "." (<DIGITS>)* | "." (<DIGITS>)+)
>
// Required exponent.
| < DOUBLE:
(["+","-"])?
(
(["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
| "." (["0"-"9"])+ (<EXPONENT>)
| (["0"-"9"])+ <EXPONENT>
)
>
// Optional exponent.
// | < DOUBLE:
// //(["+","-"])?
// (["0"-"9"])+ "." (["0"-"9"])* (<EXPONENT>)?
// | "." (["0"-"9"])+ (<EXPONENT>)?
// | (["0"-"9"])+ <EXPONENT>
// >
| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
| < #QUOTE_3D: "\"\"\"">
| < #QUOTE_3S: "'''">
// | <UCHAR: ("u" <HEX> <HEX> <HEX> <HEX> )
// | ("U" <HEX> <HEX> <HEX> <HEX> <HEX> <HEX> <HEX> <HEX>)>
// <ECHAR: ("t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'")>
//
// |
// <XCHAR: "\\" ( <ECHAR> | <UCHAR> ) >
| <ECHAR: "\\" ("t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'")>
| < STRING_LITERAL1:
// Single quoted string
"'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" >
| < STRING_LITERAL2:
// Double quoted string
"\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" >
| < STRING_LITERAL_LONG1:
<QUOTE_3S>
( ~["'","\\"] | <ECHAR> | ("'" ~["'"]) | ("''" ~["'"]))*
<QUOTE_3S> >
| < STRING_LITERAL_LONG2:
<QUOTE_3D>
( ~["\"","\\"] | <ECHAR> | ("\"" ~["\""]) | ("\"\"" ~["\""]))*
<QUOTE_3D> >
| < DIGITS: (["0"-"9"])+>
| <HEX: ["0"-"9"] | ["A"-"F"] | ["a"-"f"]>
}
TOKEN:
{
// Includes # for relative URIs
<Q_IRIref: "<" (~[">","<","\u0000"-"\u0020"])* ">" >
| <QNAME_NS: (<NCNAME_PREFIX>)? ":" >
| <QNAME: (<NCNAME_PREFIX>)? ":" (<NCNAME>)? >
| <BLANK_NODE_LABEL: "_:" <NCNAME> >
//| <BLANK_NODE_ID: "_!:" <NCNAME> >
| <VAR1: <QMARK> <VARNAME> >
| <VAR2: <DOLLAR> <VARNAME> >
| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* >
| <#A2Z: ["a"-"z","A"-"Z"]>
| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
}
TOKEN :
{
< LPAREN: "(" >
| < RPAREN: ")" >
| <NIL: <LPAREN> (<WS>|<SINGLE_LINE_COMMENT>)* <RPAREN> >
| < LBRACE: "{" >
| < RBRACE: "}" >
| < LBRACKET: "[" >
| < RBRACKET: "]" >
| < ANON: <LBRACKET> (<WS>|<SINGLE_LINE_COMMENT>)* <RBRACKET> >
| < SEMICOLON: ";" >
| < COMMA: "," >
| < DOT: "." >
}
// Operator
TOKEN :
{
< EQ: "=" >
//| < NE: "!=" >
//| < GT: ">" >
//| < LT: "<" >
//| < LE: "<=" > // Maybe: | "=>" >
//| < GE: ">=" > // Maybe: | "=<" >
| <ARROW: "=>">
| < DOLLAR: "$">
| < QMARK: "?">
| < TILDE: "~" >
| < COLON: ":" >
// | < PLUS: "+" >
// | < MINUS: "-" >
| < STAR: "*" >
| < SLASH: "/" >
| < RSLASH: "\\" >
//| < AMP: "&" >
//| < REM: "%" >
| < DATATYPE: "^^">
| < AT: "@">
}
TOKEN:
{
// XML 1.1 NCNameStartChar without "_"
<#NCCHAR1p:
["A"-"Z"] | ["a"-"z"] |
["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] >
// [#x10000-#xEFFFF]
|
<#NCCHAR1: <NCCHAR1p> | "_" >
// No trailing DOTs in qnames.
|
// #NCCHAR without "."
<#NCCHAR: (<NCCHAR1> | "-" | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
|
// NCNAME but no leading "_", no trailing ".", can have dot inside prefix name.
<#NCNAME_PREFIX: <NCCHAR1p> ((<NCCHAR>|".")* <NCCHAR>)? >
|
// With a leading "_", no dot at end of local name.
<#NCNAME: <NCCHAR1> ((<NCCHAR>|".")* <NCCHAR>)? >
|
// NCNAME without "-" and ".", allowing leading digits.
<#VARNAME: ( <NCCHAR1> | ["0"-"9"] )
( <NCCHAR1> | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >
}
/*
# Local Variables:
# tab-width: 4
# indent-tabs-mode: nil
# comment-default-style: "//"
# End:
*/