| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Turtle & N3 for Jena |
| |
| // N3 compatible: |
| // All legal Turtle documents are acceped by this grammar. |
| // Some N3 features, which are stil RDF, are provided. |
| |
| options |
| { |
| // Use \ u escapes in streams AND use a reader for the query |
| // => get both raw and escaped unicode |
| |
| JAVA_UNICODE_ESCAPE = true ; |
| UNICODE_INPUT = false ; |
| |
| STATIC = false ; |
| // DEBUG_PARSER = true ; |
| // DEBUG_TOKEN_MANAGER = true ; |
| } |
| |
| PARSER_BEGIN(TurtleParser) |
| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.hp.hpl.jena.n3.turtle.parser ; |
| |
| import com.hp.hpl.jena.n3.turtle.ParserBase ; |
| import com.hp.hpl.jena.graph.* ; |
| |
| public class TurtleParser extends ParserBase |
| { |
| } |
| PARSER_END(TurtleParser) |
| |
| // --- Entry point |
| |
| void parse() : {} |
| { |
| (<BOM>)? |
| (Statement())* <EOF> |
| } |
| |
| void Statement() : {} |
| { |
| (Directive() | TriplesSameSubject() ) |
| <DOT> |
| } |
| |
| void Directive() : { Token t ; String iri ; } |
| { |
| // Note that @prefix looks like a LANTAG |
| <PREFIX> t = <PNAME_NS> iri = IRI_REF() |
| { String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ; |
| setPrefix(t.beginLine, t.beginColumn, s, iri) ; } |
| | |
| t = <BASE> iri = IRI_REF() |
| { setBase(iri, t.beginLine, t.beginColumn) ; } |
| } |
| |
| // -------- PATH |
| TOKEN: |
| { |
| < PLING: "!" > |
| | < VBAR: "|" > |
| | < CARROT: "^" > |
| | < FPATH: "->" > |
| | < RPATH: "<-" > |
| } |
| |
| // ---- TRIPLES |
| |
| void TriplesSameSubject() : { Node s ; } |
| { |
| s = VarOrTerm() // Emit Triple checks it's a valid triple. |
| PropertyListNotEmpty(s) |
| | |
| // Any of the triple generating syntax elements |
| s = TriplesNode() |
| PropertyList(s) |
| } |
| |
| |
| void PropertyList(Node s) : { } |
| { |
| ( PropertyListNotEmpty(s) ) ? |
| } |
| |
| // Non-recursive for Turtle long PropertyList tests |
| void PropertyListNotEmpty(Node s) : { Node p ; } |
| { |
| p = Verb() |
| ObjectList(s, p) |
| (<SEMICOLON> (p = Verb() ObjectList(s, p))? )* |
| } |
| |
| // Non-recursive for Turtle long PropertyList tests |
| void ObjectList(Node s, Node p): { Node o ; } |
| { |
| Object(s, p) |
| ( <COMMA> Object(s, p) )* |
| } |
| |
| void Object(Node s, Node p): { Node o ; } |
| { |
| o = GraphNode() |
| { Triple t = new Triple(s,p,o) ; |
| emitTriple(token.beginLine, token.beginColumn, t) ; } |
| } |
| |
| Node Verb() : { Node p ; String iri ;} |
| { |
| ( iri = IRIref() { p = createNode(iri) ; } |
| | <KW_A> { p = nRDFtype ; } |
| | <EQ> |
| { p = nOwlSameAs ; |
| if ( strictTurtle ) |
| throwParseException("= (owl:sameAs) not legal in Turtle", |
| token.beginLine, token.beginColumn ) ; |
| } |
| | <ARROW> |
| { p = nLogImplies ; |
| if ( strictTurtle ) |
| throwParseException("=> (log:implies) not legal in Turtle", |
| token.beginLine, token.beginColumn ) ; |
| } |
| ) |
| { return p ; } |
| } |
| |
| // -------- Triple expansions |
| |
| // Anything that can stand in a node slot and which is |
| // a number of triples |
| |
| Node TriplesNode() : { Node n ; } |
| { |
| n = Collection() { return n ; } |
| | |
| n = BlankNodePropertyList() { return n ; } |
| } |
| |
| Node BlankNodePropertyList() : { } |
| { |
| <LBRACKET> |
| { Node n = createBNode() ; } |
| PropertyListNotEmpty(n) |
| <RBRACKET> |
| { return n ; } |
| } |
| |
| |
| // ------- RDF collections |
| |
| // Code not as SPARQL/ARQ because of output ordering. |
| Node Collection() : |
| { Node listHead = nRDFnil ; Node lastCell = null ; Node n ; } |
| { |
| <LPAREN> |
| ( |
| { Node cell = createBNode() ; |
| if ( listHead == nRDFnil ) |
| listHead = cell ; |
| if ( lastCell != null ) |
| emitTriple(token.beginLine, token.beginColumn, |
| new Triple(lastCell, nRDFrest, cell)) ; |
| } |
| n = GraphNode() |
| { |
| emitTriple(token.beginLine, token.beginColumn, |
| new Triple(cell, nRDFfirst, n)) ; |
| lastCell = cell ; |
| } |
| ) + |
| // Not * here - "()" is handled separately. |
| <RPAREN> |
| { if ( lastCell != null ) |
| emitTriple(token.beginLine, token.beginColumn, |
| new Triple(lastCell, nRDFrest, nRDFnil)) ; |
| return listHead ; } |
| } |
| |
| // -------- Nodes in a graph pattern or template |
| |
| Node GraphNode() : { Node n ; } |
| { |
| n = VarOrTerm() { return n ; } |
| | |
| n = TriplesNode() { return n ; } |
| } |
| |
| Node VarOrTerm() : {Node n = null ; } |
| { |
| ( n = Var() | n = GraphTerm() | n = Formula() ) |
| { return n ; } |
| } |
| |
| Node Formula() : {Token t ; } |
| { |
| t = <LBRACE> { startFormula(t.beginLine, t.beginColumn) ; } |
| TriplesSameSubject() |
| ( <DOT> (TriplesSameSubject())? )* |
| t = <RBRACE> { endFormula(t.beginLine, t.beginColumn) ; } |
| { return null ; } |
| } |
| |
| Node Var() : { Token t ;} |
| { |
| t = <VAR> |
| { return createVariable(t.image, t.beginLine, t.beginColumn) ; } |
| } |
| |
| Node GraphTerm() : { Node n ; String iri ; } |
| { |
| iri = IRIref() { return createNode(iri) ; } |
| | |
| n = RDFLiteral() { return n ; } |
| | |
| // Cleaner sign handling in Turtle. |
| n = NumericLiteral() { return n ; } |
| | |
| n = BooleanLiteral() { return n ; } |
| | |
| n = BlankNode() { return n ; } |
| | |
| // <LPAREN> <RPAREN> { return nRDFnil ; } |
| <NIL> { return nRDFnil ; } |
| } |
| // ---- Basic terms |
| |
| Node NumericLiteral() : { Token t ; } |
| { |
| t = <INTEGER> { return createLiteralInteger(t.image) ; } |
| | t = <DECIMAL> { return createLiteralDecimal(t.image) ; } |
| | t = <DOUBLE> { return createLiteralDouble(t.image) ; } |
| } |
| |
| Node RDFLiteral() : { Token t ; String lex = null ; } |
| { |
| lex = String() |
| // Optional lang tag and datatype. |
| { String lang = null ; String dt = null ; } |
| ( |
| lang = Langtag() |
| | |
| ( <DATATYPE> dt = IRIref() ) |
| )? |
| { return createLiteral(lex, lang, dt) ; } |
| } |
| |
| String Langtag() : { Token t ; } |
| { |
| // Enumerate the directives here because they look like language tags. |
| ( t = <LANGTAG> | t = AnyDirective() ) |
| { String lang = stripChars(t.image, 1) ; return lang ; } |
| } |
| |
| Token AnyDirective() : { Token t ; } |
| { |
| ( t = <PREFIX> | t = <BASE> ) { return t ; } |
| } |
| |
| Node BooleanLiteral() : {} |
| { |
| <TRUE> { return XSD_TRUE ; } |
| | |
| <FALSE> { return XSD_FALSE ; } |
| } |
| |
| String String() : { Token t ; String lex ; } |
| { |
| ( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; } |
| | t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; } |
| | t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; } |
| | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; } |
| ) |
| { lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; |
| return lex ; |
| } |
| } |
| |
| String IRIref() : { String iri ; } |
| { |
| iri = IRI_REF() { return iri ; } |
| | |
| iri = PrefixedName() { return iri ; } |
| } |
| |
| String PrefixedName() : { Token t ; } |
| { |
| ( t = <PNAME_LN> |
| { return resolvePName(t.image, t.beginLine, t.beginColumn) ; } |
| | |
| t = <PNAME_NS> |
| { return resolvePName(t.image, t.beginLine, t.beginColumn) ; } |
| ) |
| } |
| |
| Node BlankNode() : { Token t = null ; } |
| { |
| t = <BLANK_NODE_LABEL> |
| { return createBNode(t.image, t.beginLine, t.beginColumn) ; } |
| | |
| // <LBRACKET> <RBRACKET> { return createBNode() ; } |
| <ANON> { return createBNode() ; } |
| |
| } |
| |
| String IRI_REF() : { Token t ; } |
| { |
| t = <IRIref> |
| { return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; } |
| } |
| |
| // ------------------------------------------ |
| // Tokens |
| |
| // Comments and whitespace |
| |
| SKIP : { " " | "\t" | "\n" | "\r" | "\f" } |
| |
| TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> } |
| |
| SPECIAL_TOKEN : |
| { <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > } |
| |
| // ------------------------------------------------- |
| // Keywords : directives before LANGTAG |
| |
| TOKEN : |
| { |
| <KW_A: "a" > |
| // Prologue |
| | < PREFIX: "@prefix" > |
| | < BASE: "@base" > |
| } |
| |
| |
| TOKEN [IGNORE_CASE] : |
| { |
| < TRUE: "true" > |
| | < FALSE: "false" > |
| |
| // ------------------------------------------------- |
| |
| | < INTEGER: (["-","+"])? <DIGITS> > |
| | |
| < DECIMAL: (["-","+"])? |
| ((<DIGITS>)+ "." (<DIGITS>)* | "." (<DIGITS>)+) |
| > |
| // Required exponent. |
| | < DOUBLE: |
| (["+","-"])? |
| ( |
| (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT> |
| | "." (["0"-"9"])+ (<EXPONENT>) |
| | (["0"-"9"])+ <EXPONENT> |
| ) |
| > |
| | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > |
| | < #QUOTE_3D: "\"\"\""> |
| | < #QUOTE_3S: "'''"> |
| // "u" done by javacc input stream. |
| // "U" escapes not supported yet for Java strings |
| | <ECHAR: "\\" ("t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'")> |
| |
| | < STRING_LITERAL1: |
| // Single quoted string |
| "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" > |
| |
| | < STRING_LITERAL2: |
| // Double quoted string |
| "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" > |
| |
| | < STRING_LITERAL_LONG1: |
| <QUOTE_3S> |
| ( ~["'","\\"] | <ECHAR> | ("'" ~["'"]) | ("''" ~["'"]))* |
| <QUOTE_3S> > |
| |
| | < STRING_LITERAL_LONG2: |
| <QUOTE_3D> |
| ( ~["\"","\\"] | <ECHAR> | ("\"" ~["\""]) | ("\"\"" ~["\""]))* |
| <QUOTE_3D> > |
| | < DIGITS: (["0"-"9"])+> |
| // | <HEX: ["0"-"9"] | ["A"-"F"] | ["a"-"f"]> |
| } |
| |
| TOKEN: |
| { |
| // Includes # for relative URIs |
| <IRIref: "<" (~[ ">","<", "\"", "{", "}", "^", "\\", "|", "`", |
| "\u0000"-"\u0020"])* ">" > |
| | <PNAME_NS: (<PN_PREFIX>)? ":" > |
| | <PNAME_LN: <PNAME_NS> <PN_LOCAL> > |
| | <BLANK_NODE_LABEL: "_:" <PN_LOCAL> > |
| | <VAR: "?" <VARNAME> > |
| | <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > |
| | <#A2Z: ["a"-"z","A"-"Z"]> |
| | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> |
| } |
| |
| |
| TOKEN : |
| { |
| < LPAREN: "(" > |
| | < RPAREN: ")" > |
| |
| | <NIL: <LPAREN> (<WS>|<SINGLE_LINE_COMMENT>)* <RPAREN> > |
| |
| | < LBRACE: "{" > |
| | < RBRACE: "}" > |
| |
| | < LBRACKET: "[" > |
| | < RBRACKET: "]" > |
| | < ANON: <LBRACKET> (<WS>|<SINGLE_LINE_COMMENT>)* <RBRACKET> > |
| |
| | < SEMICOLON: ";" > |
| | < COMMA: "," > |
| | < DOT: "." > |
| } |
| |
| // Operator |
| |
| TOKEN : |
| { |
| < EQ: "=" > |
| | <ARROW: "=>"> |
| |
| | < DOLLAR: "$"> |
| | < QMARK: "?"> |
| |
| | < TILDE: "~" > |
| | < COLON: ":" > |
| |
| // | < PLUS: "+" > |
| // | < MINUS: "-" > |
| | < STAR: "*" > |
| | < SLASH: "/" > |
| | < RSLASH: "\\" > |
| | < BOM: "\uFEFF"> |
| |
| //| < AMP: "&" > |
| //| < REM: "%" > |
| |
| | < DATATYPE: "^^"> |
| | < AT: "@"> |
| } |
| |
| TOKEN: |
| { |
| <#PN_CHARS_BASE: |
| ["A"-"Z"] | ["a"-"z"] | |
| ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | |
| ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | |
| ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | |
| ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] |
| > |
| // [#x10000-#xEFFFF] |
| | |
| <#PN_CHARS_U: <PN_CHARS_BASE> | "_" > |
| | |
| // No DOT |
| <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" | |
| ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > |
| | |
| // No leading "_", no trailing ".", can have dot inside prefix name. |
| <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? > |
| | |
| // With a leading "_", no dot at end of local name. |
| <#PN_LOCAL: (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)? > |
| | |
| // NCNAME without "-" and ".", allowing leading digits. |
| <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] ) |
| ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" | |
| ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > |
| } |
| |
| // Catch-all tokens. Must be last. |
| // Any non-whitespace. Causes a parser exception, rather than a |
| // token manager error (with hidden line numbers). |
| // Only bad IRIs (e.g. spaces) now give unhelpful parse errors. |
| TOKEN: |
| { |
| <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ > |
| } |
| |
| /* |
| # Local Variables: |
| # tab-width: 4 |
| # indent-tabs-mode: nil |
| # comment-default-style: "//" |
| # End: |
| */ |