blob: 437cf739a4590603d0921fc069cd660a919ee2bd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// Turtle RDF 1.1 Gramamr.
// This is not the usual parser used by RIOT.
// RIOT has it's own tokenizer/parser which is faster.
// This grammar exists as a more convenient base for new languages.
options
{
// // \ u processed in the input stream
// JAVA_UNICODE_ESCAPE = true ;
// UNICODE_INPUT = false ;
// And Fix ECHAR
// \ u processed after parsing.
// strings, prefix names, IRIs
JAVA_UNICODE_ESCAPE = false ;
UNICODE_INPUT = true ;
STATIC = false ;
// DEBUG_PARSER = true ;
// DEBUG_TOKEN_MANAGER = true ;
}
PARSER_BEGIN(TurtleJavacc)
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.lang.extra.javacc;
import org.apache.jena.graph.*;
import org.apache.jena.riot.lang.extra.* ;
public class TurtleJavacc extends TurtleParserBase
{}
PARSER_END(TurtleJavacc)
// Entry point
void parse(): { }
{
ByteOrderMark()
(Statement())*
<EOF>
}
void ByteOrderMark() : {}
{
(<BOM>)?
}
void Statement() : {}
{
Directive()
| DirectiveOld()
// | (TriplesSameSubject() (<DOT> | <EOF> ) )
// Strict
| (TriplesSameSubject() <DOT> )
}
void Directive() : { Token t ; String iri ; }
{
<PREFIX> t = <PNAME_NS> iri = IRIREF()
{ String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ;
setPrefix(t.beginLine, t.beginColumn, s, iri) ; }
|
t = <BASE> iri = IRIREF()
{ setBase(iri, t.beginLine, t.beginColumn) ; }
}
void DirectiveOld() : { Token t ; String iri ; }
{
<PREFIX_OLD> t = <PNAME_NS> iri = IRIREF() <DOT>
{ String s = fixupPrefix(t.image, t.beginLine, t.beginColumn) ;
setPrefix(t.beginLine, t.beginColumn, s, iri) ; }
|
t = <BASE_OLD> iri = IRIREF() <DOT>
{ setBase(iri, t.beginLine, t.beginColumn) ; }
}
Token AnyDirective() : { Token t ; }
{
( t = <PREFIX> | t = <BASE> ) { return t ; }
}
void TriplesSameSubject() : { Node s ; }
{
s = SubjectNode()
PropertyListNotEmpty(s)
|
s = TriplesNode()
PropertyList(s)
}
void PropertyList(Node s) : { }
{
( PropertyListNotEmpty(s) ) ?
}
void PropertyListNotEmpty(Node s) : { Node p = null ; }
{
p = Verb()
ObjectList(s, p)
(<SEMICOLON> (p = Verb() ObjectList(s, p))? )*
}
void ObjectList(Node s, Node p): { Node o ; }
{
Object(s, p)
( <COMMA> Object(s, p) )*
}
void Object(Node s, Node p): { Node o ; }
{
o = GraphNode()
{ emitTriple(token.beginLine, token.beginColumn, s, p, o) ; }
}
Node Verb() : { Node p ; String iri;}
{
( iri = iri() { p = createNode(iri); }
| <KW_A> { p = nRDFtype ; }
)
{ return p ; }
}
// -------- Triple expansions
// Anything that can stand in a node slot and which is
// a number of triples
Node TriplesNode() : { Node n ; }
{
n = Collection() { return n ; }
|
n = BlankNodePropertyList() { return n ; }
// |
// n = TripleStar() { return n ; }
}
Node BlankNodePropertyList() : { Token t ; }
{
t = <LBRACKET>
{ Node n = createBNode( t.beginLine, t.beginColumn) ; }
PropertyListNotEmpty(n)
<RBRACKET>
{ return n ; }
}
// The syntax for RDF*
//Node TripleStar() :
// { Node s , p , o ; Token t ; }
// {
// t = "<<"
// { int beginLine = t.beginLine; int beginColumn = t.beginColumn; t = null; }
// id = createTripleId(beginLine, beginColumn() ;
// }
// s = GraphNode()
// p = GraphNode()
// o = GraphNode()
// ">>"
// { Node n = tripleStar(s, p, o);
// return n;
// }
// }
//
// ------- RDF collections
// Ordering?
Node Collection() :
{ Node listHead = nRDFnil ; Node lastCell = null ; int mark ; Node n ; Token t ; }
{
t = <LPAREN>
{ int line = t.beginLine; int column = t.beginColumn;
listStart(line, column);
}
(
{ Node cell = createListNode(line, column) ;
if ( listHead == nRDFnil )
listHead = cell ;
if ( lastCell != null )
listTriple(line, column, lastCell, nRDFrest, cell) ;
}
n = GraphNode()
{
listTriple(line, column, cell, nRDFfirst, n) ;
lastCell = cell ;
}
) +
// Not * here - "()" is handled separately.
<RPAREN>
{ if ( lastCell != null )
listTriple(line, column, lastCell, nRDFrest, nRDFnil) ;
listFinish(line, column);
return listHead ; }
}
Node SubjectNode() : { Node s; String iri ;}
{
s = BlankNode() { return s ; }
|
iri = iri() { return createNode(iri) ; }
}
Node GraphNode() : { Node n ; }
{
n = GraphTerm() { return n ; }
|
n = TriplesNode() { return n ; }
}
Node GraphTerm() : { Node n ; String iri ; }
{
iri = iri() { return createNode(iri) ; }
| n = RDFLiteral() { return n ; }
| n = NumericLiteral() { return n ; }
| n = BooleanLiteral() { return n ; }
| n = BlankNode() { return n ; }
// <LPAREN> <RPAREN> { return nRDFnil ; }
| <NIL> { return nRDFnil ; }
}
Node RDFLiteral() : { Token t ; String lex = null ; }
{
lex = String()
// Optional lang tag and datatype.
{ String lang = null ; String uri = null ; }
(
lang = LangTag()
|
<DATATYPE> uri = iri()
)?
{ return createLiteral(lex, lang, uri) ; }
}
String LangTag() : { Token t ; }
{
// Enumerate the directives here because they look like language tags.
( t = <LANGTAG> | t = AnyDirective() )
{ String lang = stripChars(t.image, 1) ; return lang ; }
}
Node NumericLiteral() : { Node n ; }
{
(
n = NumericLiteralUnsigned()
| n = NumericLiteralPositive()
| n = NumericLiteralNegative()
)
{ return n ; }
}
Node NumericLiteralUnsigned() : { Token t ; }
{
t = <INTEGER> { return createLiteralInteger(t.image) ; }
| t = <DECIMAL> { return createLiteralDecimal(t.image) ; }
| t = <DOUBLE> { return createLiteralDouble(t.image) ; }
}
Node NumericLiteralPositive() : { Token t ; }
{
t = <INTEGER_POSITIVE> { return createLiteralInteger(t.image) ; }
| t = <DECIMAL_POSITIVE> { return createLiteralDecimal(t.image) ; }
| t = <DOUBLE_POSITIVE> { return createLiteralDouble(t.image) ; }
}
Node NumericLiteralNegative() : { Token t ; }
{
t = <INTEGER_NEGATIVE> { return createLiteralInteger(t.image) ; }
| t = <DECIMAL_NEGATIVE> { return createLiteralDecimal(t.image) ; }
| t = <DOUBLE_NEGATIVE> { return createLiteralDouble(t.image) ; }
}
Node BooleanLiteral() : {}
{
<TRUE> { return XSD_TRUE ; }
|
<FALSE> { return XSD_FALSE ; }
}
String String() : { Token t ; String lex ; }
{
( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
)
{
lex = unescapeStr(lex, t.beginLine, t.beginColumn) ;
return lex ;
}
}
String iri() : { String iri ; }
{
iri = IRIREF() { return iri ; }
|
iri = PrefixedName() { return iri ; }
}
String PrefixedName() : { Token t ; }
{
( t = <PNAME_LN>
{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
|
t = <PNAME_NS>
{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
)
}
Node BlankNode() : { Token t = null ; }
{
t = <BLANK_NODE_LABEL>
{ return createBNode(t.image, t.beginLine, t.beginColumn) ; }
|
// <LBRACKET> <RBRACKET> { return createBNode(t.beginLine, t.beginColumn) ; }
t = <ANON> { return createBNode(t.beginLine, t.beginColumn) ; }
}
String IRIREF() : { Token t ; }
{
t = <IRIref>
{ return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; }
}
// ------------------------------------------
// Tokens
SKIP : { " " | "\t" | "\n" | "\r" | "\f" }
TOKEN: { <#WS: " " | "\t" | "\n" | "\r" | "\f"> }
SPECIAL_TOKEN :
{ <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > }
TOKEN : // Case sensitive tokens.
{
<KW_A: "a" >
| < PREFIX_OLD: "@prefix" >
| < BASE_OLD: "@base" >
}
TOKEN [IGNORE_CASE] :
{
< BASE: "base" >
| < PREFIX: "prefix" >
| < TRUE: "true" >
| < FALSE: "false" >
}
TOKEN :
{
<#WSC: <WS> | <SINGLE_LINE_COMMENT> >
| < BOM: "\uFEFF">
| < PLUS: "+" >
| < MINUS: "-" >
| < #DIGITS: (["0"-"9"])+>
| < INTEGER: <DIGITS> >
| < DECIMAL: (<DIGITS>)? "." <DIGITS> >
| < DOUBLE: // Required exponent.
(
(["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
| "." (["0"-"9"])+ (<EXPONENT>)
| (["0"-"9"])+ <EXPONENT>
)
>
| < INTEGER_POSITIVE: <PLUS> <INTEGER> >
| < DECIMAL_POSITIVE: <PLUS> <DECIMAL> >
| < DOUBLE_POSITIVE: <PLUS> <DOUBLE> >
| < INTEGER_NEGATIVE: <MINUS> <INTEGER> >
| < DECIMAL_NEGATIVE: <MINUS> <DECIMAL> >
| < DOUBLE_NEGATIVE: <MINUS> <DOUBLE> >
| < #EXPONENT: ["e","E"] (["+","-"])? (["0"-"9"])+ >
| < #QUOTE_3D: "\"\"\"">
| < #QUOTE_3S: "'''">
| < ECHAR: <UCHAR> | "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") >
| < STRING_LITERAL1:
// Single quoted string
"'" ( (~["'","\\","\n","\r"]) | <ECHAR> )* "'" >
| < STRING_LITERAL2:
// Double quoted string
"\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> )* "\"" >
| < STRING_LITERAL_LONG1:
<QUOTE_3S>
( ("'" | "''")? (~["'","\\"] | <ECHAR> ))*
<QUOTE_3S> >
| < STRING_LITERAL_LONG2:
<QUOTE_3D>
( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> ))*
<QUOTE_3D> >
| < LPAREN: "(" >
| < RPAREN: ")" >
// All the stuff for NUL is needed just to make a
// single list "() ." as a triple pattern illegal.
// It leads to a lot of extra work.
// Similarly [].
| <NIL: <LPAREN> (<WSC>)* <RPAREN> >
| < LBRACE: "{" >
| < RBRACE: "}" >
| < LBRACKET: "[" >
| < RBRACKET: "]" >
| < ANON: <LBRACKET> (<WSC>)* <RBRACKET> >
| < SEMICOLON: ";" >
| < COMMA: "," >
| < DOT: "." >
| < DATATYPE: "^^">
| < AT: "@">
// Includes # for relative URIs
| <#UCHAR: "\\" ( "u" | "U" ) >
| <IRIref: "<"
(~[ ">","<", "\"", "{", "}", "^", "\\", "|", "`","\u0000"-"\u0020"] | <UCHAR>)*
">" >
| <PNAME_NS: (<PN_PREFIX>)? ":" >
| <PNAME_LN: <PNAME_NS> <PN_LOCAL> >
| <BLANK_NODE_LABEL: "_:" (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS>|".")* <PN_CHARS>)? >
| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* >
| <#A2Z: ["a"-"z","A"-"Z"]>
| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
| <#PN_CHARS_BASE:
["A"-"Z"] | ["a"-"z"] |
["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"]
>
// [#x10000-#xEFFFF]
|
// With underscore
<#PN_CHARS_U: <PN_CHARS_BASE> | "_" >
|
<#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
|
// No leading "_", no trailing ".", can have dot inside prefix name.
<#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? >
|
// Local part.
<#PN_LOCAL: (<PN_CHARS_U> | ":" | ["0"-"9"] | <PLX> )
( (<PN_CHARS> | "." |":" | <PLX> )*
(<PN_CHARS> | ":" | <PLX>) )? >
|
<#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] )
( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >
|
< #PN_LOCAL_ESC: "\\"
( "_" |
"~" | "." | "-" | "!" | "$" | "&" | "'" |
"(" | ")" | "*" | "+" | "," | ";" | "=" |
"/" | "?" | "#" | "@" | "%" ) >
|
<#PLX: <PERCENT> | <PN_LOCAL_ESC> >
|
< #HEX: ["0"-"9"] | ["A"-"F"] | ["a"-"f"] >
|
< #PERCENT: "%" <HEX> <HEX> >
}
// Catch-all tokens. Must be last.
// Any non-whitespace. Causes a parser exception, rather than a
// token manager error (which hides the line numbers).
TOKEN:
{
<#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
}
/*
# Local Variables:
# tab-width: 4
# indent-tabs-mode: nil
# comment-default-style: "//"
# End:
*/