blob: a324e94115d76f6f2d8dfd1f28faf8522dbd0cf8 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Turtle & N3 for Jena
// N3 compatible:
// All legal Turtle documents are acceped by this grammar.
// Some N3 features, which are stil RDF, are provided.
options
{
// Use \ u escapes in streams AND use a reader for the query
// => get both raw and escaped unicode
JAVA_UNICODE_ESCAPE = true ;
UNICODE_INPUT = false ;
STATIC = false ;
// DEBUG_PARSER = true ;
// DEBUG_TOKEN_MANAGER = true ;
}
PARSER_BEGIN(TurtleParser)
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hp.hpl.jena.n3.turtle.parser ;
import com.hp.hpl.jena.n3.turtle.ParserBase ;
import com.hp.hpl.jena.graph.* ;
public class TurtleParser extends ParserBase
{
}
PARSER_END(TurtleParser)
// --- Entry point
void parse() : {}
{
(<BOM>)?
(Statement())* <EOF>
}
void Statement() : {}
{
(Directive() | TriplesSameSubject() )
<DOT>
}
void Directive() : { Token t ; String iri ; }
{
// Note that @prefix looks like a LANTAG
<PREFIX> t = <PNAME_NS> iri = IRI_REF()
{ String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ;
setPrefix(t.beginLine, t.beginColumn, s, iri) ; }
|
t = <BASE> iri = IRI_REF()
{ setBase(iri, t.beginLine, t.beginColumn) ; }
}
// -------- PATH
TOKEN:
{
< PLING: "!" >
| < VBAR: "|" >
| < CARROT: "^" >
| < FPATH: "->" >
| < RPATH: "<-" >
}
// ---- TRIPLES
void TriplesSameSubject() : { Node s ; }
{
s = VarOrTerm() // Emit Triple checks it's a valid triple.
PropertyListNotEmpty(s)
|
// Any of the triple generating syntax elements
s = TriplesNode()
PropertyList(s)
}
void PropertyList(Node s) : { }
{
( PropertyListNotEmpty(s) ) ?
}
// Non-recursive for Turtle long PropertyList tests
void PropertyListNotEmpty(Node s) : { Node p ; }
{
p = Verb()
ObjectList(s, p)
(<SEMICOLON> (p = Verb() ObjectList(s, p))? )*
}
// Non-recursive for Turtle long PropertyList tests
void ObjectList(Node s, Node p): { Node o ; }
{
Object(s, p)
( <COMMA> Object(s, p) )*
}
void Object(Node s, Node p): { Node o ; }
{
o = GraphNode()
{ Triple t = new Triple(s,p,o) ;
emitTriple(token.beginLine, token.beginColumn, t) ; }
}
Node Verb() : { Node p ; String iri ;}
{
( iri = IRIref() { p = createNode(iri) ; }
| <KW_A> { p = nRDFtype ; }
| <EQ>
{ p = nOwlSameAs ;
if ( strictTurtle )
throwParseException("= (owl:sameAs) not legal in Turtle",
token.beginLine, token.beginColumn ) ;
}
| <ARROW>
{ p = nLogImplies ;
if ( strictTurtle )
throwParseException("=> (log:implies) not legal in Turtle",
token.beginLine, token.beginColumn ) ;
}
)
{ return p ; }
}
// -------- Triple expansions
// Anything that can stand in a node slot and which is
// a number of triples
Node TriplesNode() : { Node n ; }
{
n = Collection() { return n ; }
|
n = BlankNodePropertyList() { return n ; }
}
Node BlankNodePropertyList() : { }
{
<LBRACKET>
{ Node n = createBNode() ; }
PropertyListNotEmpty(n)
<RBRACKET>
{ return n ; }
}
// ------- RDF collections
// Code not as SPARQL/ARQ because of output ordering.
Node Collection() :
{ Node listHead = nRDFnil ; Node lastCell = null ; Node n ; }
{
<LPAREN>
(
{ Node cell = createBNode() ;
if ( listHead == nRDFnil )
listHead = cell ;
if ( lastCell != null )
emitTriple(token.beginLine, token.beginColumn,
new Triple(lastCell, nRDFrest, cell)) ;
}
n = GraphNode()
{
emitTriple(token.beginLine, token.beginColumn,
new Triple(cell, nRDFfirst, n)) ;
lastCell = cell ;
}
) +
// Not * here - "()" is handled separately.
<RPAREN>
{ if ( lastCell != null )
emitTriple(token.beginLine, token.beginColumn,
new Triple(lastCell, nRDFrest, nRDFnil)) ;
return listHead ; }
}
// -------- Nodes in a graph pattern or template
Node GraphNode() : { Node n ; }
{
n = VarOrTerm() { return n ; }
|
n = TriplesNode() { return n ; }
}
Node VarOrTerm() : {Node n = null ; }
{
( n = Var() | n = GraphTerm() | n = Formula() )
{ return n ; }
}
Node Formula() : {Token t ; }
{
t = <LBRACE> { startFormula(t.beginLine, t.beginColumn) ; }
TriplesSameSubject()
( <DOT> (TriplesSameSubject())? )*
t = <RBRACE> { endFormula(t.beginLine, t.beginColumn) ; }
{ return null ; }
}
Node Var() : { Token t ;}
{
t = <VAR>
{ return createVariable(t.image, t.beginLine, t.beginColumn) ; }
}
Node GraphTerm() : { Node n ; String iri ; }
{
iri = IRIref() { return createNode(iri) ; }
|
n = RDFLiteral() { return n ; }
|
// Cleaner sign handling in Turtle.
n = NumericLiteral() { return n ; }
|
n = BooleanLiteral() { return n ; }
|
n = BlankNode() { return n ; }
|
// <LPAREN> <RPAREN> { return nRDFnil ; }
<NIL> { return nRDFnil ; }
}
// ---- Basic terms
Node NumericLiteral() : { Token t ; }
{
t = <INTEGER> { return createLiteralInteger(t.image) ; }
| t = <DECIMAL> { return createLiteralDecimal(t.image) ; }
| t = <DOUBLE> { return createLiteralDouble(t.image) ; }
}
Node RDFLiteral() : { Token t ; String lex = null ; }
{
lex = String()
// Optional lang tag and datatype.
{ String lang = null ; String dt = null ; }
(
lang = Langtag()
|
( <DATATYPE> dt = IRIref() )
)?
{ return createLiteral(lex, lang, dt) ; }
}
String Langtag() : { Token t ; }
{
// Enumerate the directives here because they look like language tags.
( t = <LANGTAG> | t = AnyDirective() )
{ String lang = stripChars(t.image, 1) ; return lang ; }
}
Token AnyDirective() : { Token t ; }
{
( t = <PREFIX> | t = <BASE> ) { return t ; }
}
Node BooleanLiteral() : {}
{
<TRUE> { return XSD_TRUE ; }
|
<FALSE> { return XSD_FALSE ; }
}
String String() : { Token t ; String lex ; }
{
( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
)
{ lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
return lex ;
}
}
String IRIref() : { String iri ; }
{
iri = IRI_REF() { return iri ; }
|
iri = PrefixedName() { return iri ; }
}
String PrefixedName() : { Token t ; }
{
( t = <PNAME_LN>
{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
|
t = <PNAME_NS>
{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
)
}
Node BlankNode() : { Token t = null ; }
{
t = <BLANK_NODE_LABEL>
{ return createBNode(t.image, t.beginLine, t.beginColumn) ; }
|
// <LBRACKET> <RBRACKET> { return createBNode() ; }
<ANON> { return createBNode() ; }
}
String IRI_REF() : { Token t ; }
{
t = <IRIref>
{ return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; }
}
// ------------------------------------------
// Tokens
// Comments and whitespace
SKIP : { " " | "\t" | "\n" | "\r" | "\f" }
TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> }
SPECIAL_TOKEN :
{ <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > }
// -------------------------------------------------
// Keywords : directives before LANGTAG
TOKEN :
{
<KW_A: "a" >
// Prologue
| < PREFIX: "@prefix" >
| < BASE: "@base" >
}
TOKEN [IGNORE_CASE] :
{
< TRUE: "true" >
| < FALSE: "false" >
// -------------------------------------------------
| < INTEGER: (["-","+"])? <DIGITS> >
|
< DECIMAL: (["-","+"])?
((<DIGITS>)+ "." (<DIGITS>)* | "." (<DIGITS>)+)
>
// Required exponent.
| < DOUBLE:
(["+","-"])?
(
(["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
| "." (["0"-"9"])+ (<EXPONENT>)
| (["0"-"9"])+ <EXPONENT>
)
>
| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
| < #QUOTE_3D: "\"\"\"">
| < #QUOTE_3S: "'''">
// "u" done by javacc input stream.
// "U" escapes not supported yet for Java strings
| <ECHAR: "\\" ("t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'")>
| < STRING_LITERAL1:
// Single quoted string
"'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" >
| < STRING_LITERAL2:
// Double quoted string
"\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" >
| < STRING_LITERAL_LONG1:
<QUOTE_3S>
( ~["'","\\"] | <ECHAR> | ("'" ~["'"]) | ("''" ~["'"]))*
<QUOTE_3S> >
| < STRING_LITERAL_LONG2:
<QUOTE_3D>
( ~["\"","\\"] | <ECHAR> | ("\"" ~["\""]) | ("\"\"" ~["\""]))*
<QUOTE_3D> >
| < DIGITS: (["0"-"9"])+>
// | <HEX: ["0"-"9"] | ["A"-"F"] | ["a"-"f"]>
}
TOKEN:
{
// Includes # for relative URIs
<IRIref: "<" (~[ ">","<", "\"", "{", "}", "^", "\\", "|", "`",
"\u0000"-"\u0020"])* ">" >
| <PNAME_NS: (<PN_PREFIX>)? ":" >
| <PNAME_LN: <PNAME_NS> <PN_LOCAL> >
| <BLANK_NODE_LABEL: "_:" <PN_LOCAL> >
| <VAR: "?" <VARNAME> >
| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* >
| <#A2Z: ["a"-"z","A"-"Z"]>
| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
}
TOKEN :
{
< LPAREN: "(" >
| < RPAREN: ")" >
| <NIL: <LPAREN> (<WS>|<SINGLE_LINE_COMMENT>)* <RPAREN> >
| < LBRACE: "{" >
| < RBRACE: "}" >
| < LBRACKET: "[" >
| < RBRACKET: "]" >
| < ANON: <LBRACKET> (<WS>|<SINGLE_LINE_COMMENT>)* <RBRACKET> >
| < SEMICOLON: ";" >
| < COMMA: "," >
| < DOT: "." >
}
// Operator
TOKEN :
{
< EQ: "=" >
| <ARROW: "=>">
| < DOLLAR: "$">
| < QMARK: "?">
| < TILDE: "~" >
| < COLON: ":" >
// | < PLUS: "+" >
// | < MINUS: "-" >
| < STAR: "*" >
| < SLASH: "/" >
| < RSLASH: "\\" >
| < BOM: "\uFEFF">
//| < AMP: "&" >
//| < REM: "%" >
| < DATATYPE: "^^">
| < AT: "@">
}
TOKEN:
{
<#PN_CHARS_BASE:
["A"-"Z"] | ["a"-"z"] |
["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"]
>
// [#x10000-#xEFFFF]
|
<#PN_CHARS_U: <PN_CHARS_BASE> | "_" >
|
// No DOT
<#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
|
// No leading "_", no trailing ".", can have dot inside prefix name.
<#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? >
|
// With a leading "_", no dot at end of local name.
<#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)? >
|
// NCNAME without "-" and ".", allowing leading digits.
<#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] )
( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >
}
// Catch-all tokens. Must be last.
// Any non-whitespace. Causes a parser exception, rather than a
// token manager error (with hidden line numbers).
// Only bad IRIs (e.g. spaces) now give unhelpful parse errors.
TOKEN:
{
<#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
}
/*
# Local Variables:
# tab-width: 4
# indent-tabs-mode: nil
# comment-default-style: "//"
# End:
*/