| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| // Turtle RDF 1.1 Gramamr. |
| // This is not the usual parser used by RIOT. |
| // RIOT has it's own tokenizer/parser which is faster. |
| // This grammar exists as a more convenient base for new languages. |
| |
| options |
| { |
| // // \ u processed in the input stream |
| // JAVA_UNICODE_ESCAPE = true ; |
| // UNICODE_INPUT = false ; |
| |
| // And Fix ECHAR |
| // \ u processed after parsing. |
| // strings, prefix names, IRIs |
| JAVA_UNICODE_ESCAPE = false ; |
| UNICODE_INPUT = true ; |
| |
| STATIC = false ; |
| // DEBUG_PARSER = true ; |
| // DEBUG_TOKEN_MANAGER = true ; |
| } |
| |
| PARSER_BEGIN(TurtleJavacc) |
| /** |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.jena.riot.lang.extra.javacc; |
| |
| import org.apache.jena.graph.*; |
| import org.apache.jena.riot.lang.extra.* ; |
| |
| public class TurtleJavacc extends TurtleParserBase |
| {} |
| PARSER_END(TurtleJavacc) |
| |
| // Entry point |
| void parse(): { } |
| { |
| ByteOrderMark() |
| (Statement())* |
| <EOF> |
| } |
| |
| void ByteOrderMark() : {} |
| { |
| (<BOM>)? |
| } |
| |
| void Statement() : {} |
| { |
| Directive() |
| | DirectiveOld() |
| // | (TriplesSameSubject() (<DOT> | <EOF> ) ) |
| // Strict |
| | (TriplesSameSubject() <DOT> ) |
| } |
| |
| void Directive() : { Token t ; String iri ; } |
| { |
| <PREFIX> t = <PNAME_NS> iri = IRIREF() |
| { String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ; |
| setPrefix(t.beginLine, t.beginColumn, s, iri) ; } |
| | |
| t = <BASE> iri = IRIREF() |
| { setBase(iri, t.beginLine, t.beginColumn) ; } |
| } |
| |
| void DirectiveOld() : { Token t ; String iri ; } |
| { |
| <PREFIX_OLD> t = <PNAME_NS> iri = IRIREF() <DOT> |
| { String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ; |
| setPrefix(t.beginLine, t.beginColumn, s, iri) ; } |
| | |
| t = <BASE_OLD> iri = IRIREF() <DOT> |
| { setBase(iri, t.beginLine, t.beginColumn) ; } |
| } |
| |
| Token AnyDirective() : { Token t ; } |
| { |
| ( t = <PREFIX> | t = <BASE> ) { return t ; } |
| } |
| |
| void TriplesSameSubject() : { Node s ; } |
| { |
| s = SubjectNode() |
| PropertyListNotEmpty(s) |
| | |
| s = TriplesNode() |
| PropertyList(s) |
| } |
| |
| void PropertyList(Node s) : { } |
| { |
| ( PropertyListNotEmpty(s) ) ? |
| } |
| |
| void PropertyListNotEmpty(Node s) : { Node p = null ; } |
| { |
| p = Verb() |
| ObjectList(s, p) |
| (<SEMICOLON> (p = Verb() ObjectList(s, p))? )* |
| } |
| |
| void ObjectList(Node s, Node p): { Node o ; } |
| { |
| Object(s, p) |
| ( <COMMA> Object(s, p) )* |
| } |
| |
| void Object(Node s, Node p): { Node o ; } |
| { |
| o = GraphNode() |
| { emitTriple(token.beginLine, token.beginColumn, s, p, o) ; } |
| } |
| |
| Node Verb() : { Node p ; String iri;} |
| { |
| ( iri = iri() { p = createNode(iri); } |
| | <KW_A> { p = nRDFtype ; } |
| ) |
| { return p ; } |
| } |
| |
| // -------- Triple expansions |
| |
| // Anything that can stand in a node slot and which is |
| // a number of triples |
| |
| Node TriplesNode() : { Node n ; } |
| { |
| n = Collection() { return n ; } |
| | |
| n = BlankNodePropertyList() { return n ; } |
| // | |
| // n = TripleStar() { return n ; } |
| } |
| |
| Node BlankNodePropertyList() : { Token t ; } |
| { |
| t = <LBRACKET> |
| { Node n = createBNode( t.beginLine, t.beginColumn) ; } |
| PropertyListNotEmpty(n) |
| <RBRACKET> |
| { return n ; } |
| } |
| |
| // The syntax for RDF* |
| //Node TripleStar() : |
| // { Node s , p , o ; Token t ; } |
| // { |
| // t = "<<" |
| // { int beginLine = t.beginLine; int beginColumn = t.beginColumn; t = null; } |
| // id = createTripleId(beginLine, beginColumn() ; |
| // } |
| // s = GraphNode() |
| // p = GraphNode() |
| // o = GraphNode() |
| // ">>" |
| // { Node n = tripleStar(s, p, o); |
| // return n; |
| // } |
| // } |
| // |
| |
| // ------- RDF collections |
| // Ordering? |
| |
| Node Collection() : |
| { Node listHead = nRDFnil ; Node lastCell = null ; int mark ; Node n ; Token t ; } |
| { |
| t = <LPAREN> |
| { int line = t.beginLine; int column = t.beginColumn; |
| listStart(line, column); |
| } |
| ( |
| { Node cell = createListNode(line, column) ; |
| if ( listHead == nRDFnil ) |
| listHead = cell ; |
| if ( lastCell != null ) |
| listTriple(line, column, lastCell, nRDFrest, cell) ; |
| } |
| n = GraphNode() |
| { |
| listTriple(line, column, cell, nRDFfirst, n) ; |
| lastCell = cell ; |
| } |
| ) + |
| // Not * here - "()" is handled separately. |
| <RPAREN> |
| { if ( lastCell != null ) |
| listTriple(line, column, lastCell, nRDFrest, nRDFnil) ; |
| listFinish(line, column); |
| return listHead ; } |
| } |
| |
| Node SubjectNode() : { Node s; String iri ;} |
| { |
| s = BlankNode() { return s ; } |
| | |
| iri = iri() { return createNode(iri) ; } |
| } |
| |
| Node GraphNode() : { Node n ; } |
| { |
| n = GraphTerm() { return n ; } |
| | |
| n = TriplesNode() { return n ; } |
| } |
| |
| Node GraphTerm() : { Node n ; String iri ; } |
| { |
| iri = iri() { return createNode(iri) ; } |
| | n = RDFLiteral() { return n ; } |
| | n = NumericLiteral() { return n ; } |
| | n = BooleanLiteral() { return n ; } |
| | n = BlankNode() { return n ; } |
| // <LPAREN> <RPAREN> { return nRDFnil ; } |
| | <NIL> { return nRDFnil ; } |
| } |
| |
| Node RDFLiteral() : { Token t ; String lex = null ; } |
| { |
| lex = String() |
| // Optional lang tag and datatype. |
| { String lang = null ; String uri = null ; } |
| ( |
| lang = LangTag() |
| | |
| <DATATYPE> uri = iri() |
| )? |
| { return createLiteral(lex, lang, uri) ; } |
| } |
| |
| String LangTag() : { Token t ; } |
| { |
| // Enumerate the directives here because they look like language tags. |
| ( t = <LANGTAG> | t = AnyDirective() ) |
| { String lang = stripChars(t.image, 1) ; return lang ; } |
| } |
| |
| Node NumericLiteral() : { Node n ; } |
| { |
| ( |
| n = NumericLiteralUnsigned() |
| | n = NumericLiteralPositive() |
| | n = NumericLiteralNegative() |
| ) |
| { return n ; } |
| |
| } |
| |
| Node NumericLiteralUnsigned() : { Token t ; } |
| { |
| t = <INTEGER> { return createLiteralInteger(t.image) ; } |
| | t = <DECIMAL> { return createLiteralDecimal(t.image) ; } |
| | t = <DOUBLE> { return createLiteralDouble(t.image) ; } |
| } |
| |
| Node NumericLiteralPositive() : { Token t ; } |
| { |
| t = <INTEGER_POSITIVE> { return createLiteralInteger(t.image) ; } |
| | t = <DECIMAL_POSITIVE> { return createLiteralDecimal(t.image) ; } |
| | t = <DOUBLE_POSITIVE> { return createLiteralDouble(t.image) ; } |
| } |
| |
| Node NumericLiteralNegative() : { Token t ; } |
| { |
| t = <INTEGER_NEGATIVE> { return createLiteralInteger(t.image) ; } |
| | t = <DECIMAL_NEGATIVE> { return createLiteralDecimal(t.image) ; } |
| | t = <DOUBLE_NEGATIVE> { return createLiteralDouble(t.image) ; } |
| } |
| |
| |
| Node BooleanLiteral() : {} |
| { |
| <TRUE> { return XSD_TRUE ; } |
| | |
| <FALSE> { return XSD_FALSE ; } |
| } |
| |
| String String() : { Token t ; String lex ; } |
| { |
| ( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; } |
| | t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; } |
| | t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; } |
| | t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; } |
| ) |
| { |
| lex = unescapeStr(lex, t.beginLine, t.beginColumn) ; |
| return lex ; |
| } |
| } |
| |
| String iri() : { String iri ; } |
| { |
| iri = IRIREF() { return iri ; } |
| | |
| iri = PrefixedName() { return iri ; } |
| } |
| |
| String PrefixedName() : { Token t ; } |
| { |
| ( t = <PNAME_LN> |
| { return resolvePName(t.image, t.beginLine, t.beginColumn) ; } |
| | |
| t = <PNAME_NS> |
| { return resolvePName(t.image, t.beginLine, t.beginColumn) ; } |
| ) |
| } |
| |
| Node BlankNode() : { Token t = null ; } |
| { |
| t = <BLANK_NODE_LABEL> |
| { return createBNode(t.image, t.beginLine, t.beginColumn) ; } |
| | |
| // <LBRACKET> <RBRACKET> { return createBNode(t.beginLine, t.beginColumn) ; } |
| t = <ANON> { return createBNode(t.beginLine, t.beginColumn) ; } |
| |
| } |
| |
| String IRIREF() : { Token t ; } |
| { |
| t = <IRIref> |
| { return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; } |
| } |
| |
| // ------------------------------------------ |
| // Tokens |
| |
| SKIP : { " " | "\t" | "\n" | "\r" | "\f" } |
| |
| TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> } |
| |
| SPECIAL_TOKEN : |
| { <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > } |
| |
| TOKEN : // Case sensitive tokens. |
| { |
| <KW_A: "a" > |
| | < PREFIX_OLD: "@prefix" > |
| | < BASE_OLD: "@base" > |
| } |
| |
| TOKEN [IGNORE_CASE] : |
| { |
| < BASE: "base" > |
| | < PREFIX: "prefix" > |
| |
| | < TRUE: "true" > |
| | < FALSE: "false" > |
| } |
| |
| TOKEN : |
| { |
| <#WSC: <WS> | <SINGLE_LINE_COMMENT> > |
| | < BOM: "\uFEFF"> |
| | < PLUS: "+" > |
| | < MINUS: "-" > |
| |
| | < #DIGITS: (["0"-"9"])+> |
| | < INTEGER: <DIGITS> > |
| | < DECIMAL: (<DIGITS>)? "." <DIGITS> > |
| | < DOUBLE: // Required exponent. |
| ( |
| (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT> |
| | "." (["0"-"9"])+ (<EXPONENT>) |
| | (["0"-"9"])+ <EXPONENT> |
| ) |
| > |
| |
| | < INTEGER_POSITIVE: <PLUS> <INTEGER> > |
| | < DECIMAL_POSITIVE: <PLUS> <DECIMAL> > |
| | < DOUBLE_POSITIVE: <PLUS> <DOUBLE> > |
| |
| | < INTEGER_NEGATIVE: <MINUS> <INTEGER> > |
| | < DECIMAL_NEGATIVE: <MINUS> <DECIMAL> > |
| | < DOUBLE_NEGATIVE: <MINUS> <DOUBLE> > |
| |
| | < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ > |
| | < #QUOTE_3D: "\"\"\""> |
| | < #QUOTE_3S: "'''"> |
| | < ECHAR: <UCHAR> | "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") > |
| | < STRING_LITERAL1: |
| // Single quoted string |
| "'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" > |
| | < STRING_LITERAL2: |
| // Double quoted string |
| "\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" > |
| | < STRING_LITERAL_LONG1: |
| <QUOTE_3S> |
| ( ("'" | "''")? (~["'","\\"] | <ECHAR> ))* |
| <QUOTE_3S> > |
| |
| | < STRING_LITERAL_LONG2: |
| <QUOTE_3D> |
| ( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> ))* |
| <QUOTE_3D> > |
| |
| | < LPAREN: "(" > |
| | < RPAREN: ")" > |
| |
| // All the stuff for NUL is needed just to make a |
| // single list "() ." as a triple pattern illegal. |
| // It leads to a lot of extra work. |
| // Similarly []. |
| |
| | <NIL: <LPAREN> (<WSC>)* <RPAREN> > |
| |
| | < LBRACE: "{" > |
| | < RBRACE: "}" > |
| |
| | < LBRACKET: "[" > |
| | < RBRACKET: "]" > |
| | < ANON: <LBRACKET> (<WSC>)* <RBRACKET> > |
| |
| | < SEMICOLON: ";" > |
| | < COMMA: "," > |
| | < DOT: "." > |
| |
| | < DATATYPE: "^^"> |
| | < AT: "@"> |
| |
| // Includes # for relative URIs |
| | <#UCHAR: "\\" ( "u" | "U" ) > |
| | <IRIref: "<" |
| (~[ ">","<", "\"", "{", "}", "^", "\\", "|", "`","\u0000"-"\u0020"] | <UCHAR>)* |
| ">" > |
| | <PNAME_NS: (<PN_PREFIX>)? ":" > |
| | <PNAME_LN: <PNAME_NS> <PN_LOCAL> > |
| | <BLANK_NODE_LABEL: "_:" (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)? > |
| | <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* > |
| | <#A2Z: ["a"-"z","A"-"Z"]> |
| | <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]> |
| |
| | <#PN_CHARS_BASE: |
| ["A"-"Z"] | ["a"-"z"] | |
| ["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] | |
| ["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] | |
| ["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] | |
| ["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"] |
| > |
| // [#x10000-#xEFFFF] |
| | |
| // With underscore |
| <#PN_CHARS_U: <PN_CHARS_BASE> | "_" > |
| | |
| <#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" | |
| ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) > |
| | |
| // No leading "_", no trailing ".", can have dot inside prefix name. |
| <#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? > |
| | |
| // Local part. |
| <#PN_LOCAL: (<PN_CHARS_U> | ":" | ["0"-"9"] | <PLX> ) |
| ( (<PN_CHARS> | "." |":" | <PLX> )* |
| (<PN_CHARS> | ":" | <PLX>) )? > |
| | |
| <#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] ) |
| ( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" | |
| ["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* > |
| | |
| < #PN_LOCAL_ESC: "\\" |
| ( "_" | |
| "~" | "." | "-" | "!" | "$" | "&" | "'" | |
| "(" | ")" | "*" | "+" | "," | ";" | "=" | |
| "/" | "?" | "#" | "@" | "%" ) > |
| | |
| <#PLX: <PERCENT> | <PN_LOCAL_ESC> > |
| | |
| < #HEX: ["0"-"9"] | ["A"-"F"] | ["a"-"f"] > |
| | |
| < #PERCENT: "%" <HEX> <HEX> > |
| } |
| |
| // Catch-all tokens. Must be last. |
| // Any non-whitespace. Causes a parser exception, rather than a |
| // token manager error (which hides the line numbers). |
| TOKEN: |
| { |
| <#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ > |
| } |
| |
| /* |
| # Local Variables: |
| # tab-width: 4 |
| # indent-tabs-mode: nil |
| # comment-default-style: "//" |
| # End: |
| */ |