blob: 5cc887b5b23e7703dfb453880432182e9ebf129d [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
options
{
JAVA_UNICODE_ESCAPE = false ;
UNICODE_INPUT = true ;
STATIC = false ;
// DEBUG_PARSER = true ;
// DEBUG_TOKEN_MANAGER = true ;
}
PARSER_BEGIN(ShExJavacc)
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.shex.parser.javacc;
import static org.apache.jena.shex.sys.SysShex.*;
import org.apache.jena.shex.parser.*;
import org.apache.jena.shex.expressions.*;
import org.apache.jena.graph.*;
import static org.apache.jena.riot.lang.extra.LangParserLib.*;
import java.util.*;
public class ShExJavacc extends ParserShExC
{}
PARSER_END(ShExJavacc)
void UnitShapes(): { }
{
(<BOM>)?
shexDoc()
<EOF>
}
void shexDoc() : {}
{
{ startShexDoc(); }
(directive())*
(
(notStartAction() | startActions())
(statement())*
)?
{ finishShexDoc(); }
}
// ----
void directive() : {}
{
( baseDecl() | prefixDecl() | importDecl() )
}
void baseDecl() : { String iri ; }
{
<BASE> iri = IRIREF()
{ setBase(iri, token.beginLine, token.beginColumn) ; }
}
void prefixDecl() : { Token t ; String iri ; }
{
<PREFIX> t = <PNAME_NS> iri = IRIREF()
{ setPrefix(t.image, iri, t.beginLine, t.beginColumn) ; }
}
void importDecl() : { Token t ; String iri ; }
{
t = <IMPORT> iri = iri()
{ imports(iri, token.beginLine, token.beginColumn) ; }
}
// ----
void notStartAction() : {}
{
start() | shapeExprDecl()
}
void start() : { }
{
{ startStartClause(); }
<START> <EQUALS> inlineShapeExpression()
{ finishStartClause(); }
}
void startActions() : {}
{
(codeDecl())+
}
void statement() : {}
{
directive() | notStartAction()
}
// ----
void shapeExprDecl() : { Node n;}
{
{ startShapeExprDecl(); }
n = shapeExprLabel()
{ shapeExprDecl(n, token.beginLine, token.beginColumn); }
(shapeExpression() | <EXTERNAL> { shapeExternal(); })
{ finishShapeExprDecl(); }
}
// From: shapeExprDecl
// Recursion: () in shapeAtom, at shapeRef, and inlineShapeAtom()
void shapeExpression() : { int idx; }
{
{ idx = startShapeExpression(); }
shapeOr()
{ finishShapeExpression(idx); }
}
void shapeOr() : { int idx; }
{
{ idx = startShapeOr(); }
shapeAnd() (<OR> shapeAnd())*
{ finishShapeOr(idx); }
}
void shapeAnd() : { int idx; }
{
{ idx = startShapeAnd(); }
shapeNot() (<AND> shapeNot())*
{ finishShapeAnd(idx); }
}
void shapeNot() : { int idx; boolean negate = false; }
{
{ idx = startShapeNot(); }
(<NOT> { negate = true; })? shapeAtom()
{ finishShapeNot(idx,negate); }
}
void shapeAtom() : { int idx; }
{
{ idx = startShapeAtom(); }
// No literals-as-subjects.
( nonLitNodeConstraint() (shapeOrRef())?
| litNodeConstraint()
| shapeOrRef() (nonLitNodeConstraint())?
| <LPAREN> shapeExpression() <RPAREN>
| <DOT> { shapeAtomDOT(); }
)
{ finishShapeAtom(idx); }
}
// void shapeAtomNoRef() : {}
// {
// nonLitNodeConstraint() (shapeOrRef())?
// | litNodeConstraint()
// | shapeDefinition() (nonLitNodeConstraint())?
// | <LPAREN> shapeExpression() <RPAREN>
// | <DOT>
// }
// Inline versions of same.
// From start()
// Recursion: in tripleConstraint
void inlineShapeExpression() : { int idx; }
{
{ idx = startInlineShapeExpression(); }
inlineShapeOr()
{ finishInlineShapeExpression(idx); }
}
void inlineShapeOr() : { int idx; }
{
{ idx = startInlineShapeOr(); }
inlineShapeAnd() (<OR> inlineShapeAnd())*
{ finishInlineShapeOr(idx); }
}
void inlineShapeAnd() : { int idx; }
{
{ idx = startInlineShapeAnd(); }
inlineShapeNot() (<AND> inlineShapeNot())*
{ finishInlineShapeAnd(idx); }
}
void inlineShapeNot() : { int idx; boolean negate = false; }
{
{ idx = startInlineShapeNot(); }
(<NOT> { negate = true; })? inlineShapeAtom()
{ finishInlineShapeNot(idx, negate); }
}
void inlineShapeAtom() : { int idx; }
{
{ idx = startInlineShapeAtom(); }
// No literals-as-subjects.
( nonLitNodeConstraint() (inlineShapeOrRef())?
| litNodeConstraint()
| inlineShapeOrRef() (nonLitNodeConstraint())?
| <LPAREN> shapeExpression() <RPAREN>
| <DOT> { shapeAtomDOT(); }
)
{ finishInlineShapeAtom(idx); }
}
void shapeOrRef() : { int idx; }
{
shapeDefinition() | shapeRef()
}
void inlineShapeOrRef() : { int idx; }
{
inlineShapeDefinition() | shapeRef()
}
void shapeRef() : { Token t ; Node ref; }
{
( t = <ATPNAME_LN>
{ ref = resolve_AT_PName(t.image, t.beginLine, t.beginColumn) ; }
| t = <ATPNAME_NS>
{ ref = resolve_AT_PName(t.image, t.beginLine, t.beginColumn) ; }
| <AT> ref = shapeExprLabel()
)
{ shapeReference(ref); }
}
void litNodeConstraint() : { String str; Token t; int idx; } {
{ idx = startLiteralNodeConstraint(token.beginLine, token.beginColumn); }
(
t = <LITERAL>
{ cNodeKind(t.image, t.beginLine, t.beginColumn); }
(xsFacet())*
| str = datatype()
{ cDatatype(str, token.beginLine, token.beginColumn); }
(xsFacet())*
| valueSet()
(xsFacet())*
| (numericFacet())+
)
{ finishLiteralNodeConstraint(idx, token.beginLine, token.beginColumn); }
}
// Check precedence
void nonLitNodeConstraint() : { int idx; } {
{ idx = startNonLiteralNodeConstraint(token.beginLine, token.beginColumn); }
( nonLiteralKind() (stringFacet())*
| (stringFacet())+
)
{ finishNonLiteralNodeConstraint(idx, token.beginLine, token.beginColumn); }
}
void nonLiteralKind() : { Token t; }
{
( t = <IRI> | t = <BNODE> | t = <NONLITERAL> )
{ cNodeKind(t.image, t.beginLine, t.beginColumn); }
}
void xsFacet() : { }
{
stringFacet() | numericFacet()
}
void stringFacet() : { String str; Token t;}
{
( str = stringLength() t = <INTEGER>
{ int len = integer(t.image, t.beginLine, t.beginColumn);
stringFacetLength(str, len);
}
| t = <REGEXP>
{ stringFacetRegex(t.image, t.beginLine, t.beginColumn); }
)
}
String stringLength() : { }
{
( <LENGTH> | <MINLENGTH> | <MAXLENGTH> )
{ return token.image; }
}
void numericFacet() : { String range; Token t; Node numLit; String lenStr; }
{
(
range = numericRange() numLit = numericLiteral()
{ numericFacetRange(range, numLit, token.beginLine, token.beginColumn); }
| lenStr = numericLength() t = <INTEGER>
{ int num = integer(t.image, t.beginLine, t.beginColumn);
numericFacetLength(lenStr, num, token.beginLine, token.beginColumn);
}
)
}
String numericRange() : { Token t; }
{
( t = <MININCLUSIVE> | t = <MINEXCLUSIVE> | t = <MAXINCLUSIVE> | t = <MAXEXCLUSIVE> )
{ return t.image; }
}
String numericLength() : { Token t; }
{
( t = <TOTALDIGITS> | t = <FRACTIONDIGITS> )
{ return t.image; }
}
// "{ ... }"
void shapeDefinition() :
{ boolean closed = false ;
TripleExpression tripleExpr = null;
List<Node> extras = new ArrayList<Node>();
}
{
{ startShapeDefinition(); }
(extraPropertySet(extras) | <CLOSED> { closed = true; } )*
<LBRACE> (tripleExpr = tripleExpression())? <RBRACE>
(annotation())*
semanticActions()
{ finishShapeDefinition(tripleExpr, extras, closed); }
}
void inlineShapeDefinition() :
{ boolean closed = false ;
TripleExpression tripleExpr = null;
List<Node> extras = new ArrayList<Node>();
}
{
{ startShapeDefinition(); }
( extraPropertySet(extras) | <CLOSED> { closed = true; } )*
<LBRACE> (tripleExpr = tripleExpression())? <RBRACE>
{ finishShapeDefinition(tripleExpr, extras, closed); }
}
void extraPropertySet(List<Node> extras) : { Node p; }
{
<EXTRA> (p = predicate() { extras.add(p); })+
}
// Original:
// void tripleExpression() : {}
// {
// oneOfTripleExpr()
// }
//
// void oneOfTripleExpr() : {}
// {
// (LOOKAHEAD(2)
// groupTripleExpr()
// | multiElementOneOf()
// )
// }
//
// void multiElementOneOf() : {}
// {
// groupTripleExpr() (<VBAR> groupTripleExpr())+
// }
//
// void groupTripleExpr() : {}
// {
// ( LOOKAHEAD(2)
// singleElementGroup()
// | multiElementGroup()
// )
// }
//
// void singleElementGroup() : {}
// {
// unaryTripleExpr() (<SEMI_COLON>)?
// }
//
// void multiElementGroup() : {}
// {
// unaryTripleExpr()
// (LOOKAHEAD(2) <SEMI_COLON> unaryTripleExpr())+
// (<SEMI_COLON>)?
// }
// ---- Improvement for LL(1)
TripleExpression tripleExpression() : { int idx; }
{
{ idx = startTripleExpression(); }
tripleExpressionClause() (<VBAR> tripleExpressionClause())*
{ return finishTripleExpression(idx); }
}
void tripleExpressionClause() : { int idx; } {
{ idx = startTripleExpressionClause(); }
tripleExpressionClause_1()
{ finishTripleExpressionClause(idx); }
}
// // Can end in single ";"
// void tripleExpressionClause_0() : {}
// {
// unaryTripleExpr()
// // Allows ";;;"
// (<SEMI_COLON> ( unaryTripleExpr() )?)*
// }
// Iterative, but needs LOOKAHEAD(2)
void tripleExpressionClause_1() : { }
{
unaryTripleExpr()
( LOOKAHEAD(2)
<SEMI_COLON> unaryTripleExpr()
)*
(<SEMI_COLON>)?
}
// // Recursive
// void tripleExpressionClause_2() : { }
// {
// unaryTripleExpr() (<SEMI_COLON> (tripleExpressionClause() )? )?
// }
// ----
void unaryTripleExpr() : { Node n = null; }
{
{ startUnaryTripleExpr(); }
(
( (<DOLLAR> n = tripleExprLabel())? (tripleConstraint(n) | bracketedTripleExpr(n)) )
| include()
)
{ finishUnaryTripleExpr(); }
}
void bracketedTripleExpr(Node label): { TripleExpression tripleExpr = null; Cardinality cardinality = null; }
{
{ startBracketedTripleExpr(); }
<LPAREN> tripleExpr = tripleExpression() <RPAREN>
(cardinality = cardinality())?
(annotation())*
semanticActions()
{ finishBracketedTripleExpr(label, tripleExpr, cardinality); }
}
void tripleConstraint(Node label) : { Node p; int idx; boolean reverse = false; Cardinality cardinality = null; }
{
{ idx = startTripleConstraint(); }
( reverse = senseFlags() )?
p = predicate()
inlineShapeExpression()
(cardinality = cardinality())?
(annotation())*
semanticActions()
{ finishTripleConstraint(label, idx, p, reverse, cardinality); }
}
Cardinality cardinality() : {}
{
( <STAR> | <PLUS>| <QMARK> | <REPEAT_RANGE> )
{ return cardinalityRange(token.image, token.beginLine, token.beginColumn); }
}
boolean senseFlags() : {}
{
<CARAT>
{ return true; }
}
void valueSet() : {}
{
{ startValueSet(); }
<LBRACKET> (valueSetValue())* <RBRACKET>
{ finishValueSet(); }
}
// in https://github.com/shexSpec/grammar/blob/master/ShExDoc.g4
// valueSetValue : iriRange
// | literalRange
// | languageRange
// | '.' (iriExclusion+ | literalExclusion+ | languageExclusion+)
// ;
// void valueSetValue() : {}
// {
// iriRange() | literalRange() | languageRange() | (LOOKAHEAD(2) exclusion())+
// }
//
// void exclusion() : {}
// {
// <MINUS>
// (
// iri() | literal() | <LANGTAG>
// ) (<TILDE>)?
// }
void valueSetValue() : {}
{
{ startValueSetValue(); }
( iriRange()
| literalRange()
| languageRange()
| ( <DOT> { startValueSetValueDot(); } (valueExclusion())+ { finishValueSetValueDot(); } )
)
{ finishValueSetValue(); }
}
void valueExclusion() : { String iriStr = null; String lang = null; Node lit = null; boolean isStem = false; }
{
<MINUS>
{ startValueExclusion(); }
( iriStr = iri()
| lit = literal()
| <LANGTAG> { lang = token.image; }
)
(<TILDE> { isStem = true ;})?
{ finishValueExclusion(iriStr, lang, lit, isStem); }
}
// void iriRange() : {}
// {
// iri() (<TILDE> ( iriExclusion())*)?
// }
//
// void iriExclusion() : {}
// {
// <MINUS> iri() (<TILDE>)?
// }
void iriRange() : { String iriStr; boolean seenTilde = false; boolean haveSet = false; }
{
// iri() (<TILDE> ( <MINUS> iri() (<TILDE>)? )* )?
{ startIriRange(); }
iriStr = iri()
( <TILDE> { seenTilde = true; }
{ valueSetIriRange(iriStr, seenTilde); haveSet = true; }
( <MINUS>
{ iriStr = null; seenTilde = false; }
iriStr = iri()
(<TILDE> { seenTilde = true; })?
{ exclusionIriRange(iriStr, seenTilde); }
)*
)?
{ if ( ! haveSet ) { valueSetIriRange(iriStr, false); }
finishIriRange(); }
}
// void literalRange() : {}
// {
// literal() (<TILDE> ( literalExclusion() )* )?
// }
//
// void literalExclusion() : {}
// {
// <MINUS> literal() (<TILDE>)?
// }
void literalRange() : { Node lit; boolean seenTilde = false; boolean haveSet = false; }
{
// literal() (<TILDE> ( <MINUS> literal() (<TILDE>)? )* )?
{ startLiteralRange(); }
lit = literal()
( <TILDE> { seenTilde = true; }
{ valueSetLiteralRange(lit, seenTilde); haveSet = true; }
( <MINUS>
{ lit = null; seenTilde = false; }
lit = literal()
(<TILDE> { seenTilde = true; })?
{ exclusionLiteralRange(lit, seenTilde); }
)*
)?
{ if ( ! haveSet ) { valueSetLiteralRange(lit, false); }
finishLiteralRange();
}
}
// void languageRange() : {}
// {
// (
// <LANGTAG> (<TILDE> ( languageExclusion() )* )?
// |
// <AT> <TILDE> ( languageExclusion() )*
// )
// }
//
// void languageExclusion() : {}
// {
// <MINUS> <LANGTAG> (<TILDE>)?
// }
// Better as original?
void languageRange() : { }
{
{ startLanguageRange(); }
(langRangeLANGTAG() | langRangeAT() )
{ finishLanguageRange(); }
}
void langRangeLANGTAG() : { String lang = null; boolean seenTilde = false; boolean haveSet = false; }
{
<LANGTAG> { lang = token.image; }
( <TILDE> { seenTilde = true; }
{ valueSetLanguageRange(lang, seenTilde); haveSet = true; }
( <MINUS>
{ lang = null; seenTilde = false; }
<LANGTAG> { lang = token.image; }
(<TILDE> { seenTilde = true; })?
{ exclusionLanguageRange(lang, seenTilde); }
)*
)?
{ if ( ! haveSet ) valueSetLanguageRange(lang, false);
}
}
void langRangeAT() : { boolean haveSet = false; String lang = null; boolean seenTilde = false;}
{
<AT>
<TILDE>
{ valueSetLanguageRange("@", true); }
( <MINUS>
<LANGTAG> { lang = token.image; }
(<TILDE> { seenTilde = true; })?
{ exclusionLanguageRange(lang, seenTilde); }
)*
}
void include() : { Node n = null; }
{
<AMP> n = tripleExprLabel()
{ ampTripleExprLabel(n); }
}
void annotation() : {}
{
"//" predicate() (iri() | literal())
}
void semanticActions() : {}
{
(codeDecl())*
}
void codeDecl() : {}
{
// Spec:
// <CODE> ::= "{" ([^%\\] | "\\" [%\\] | UCHAR)* "%" "}"
// <PERCENT_CHAR> iri() (<CODE> | <PERCENT_CHAR> )
// Avoid <CODE> token which causes a lot of backtracking.
// It starts "{" which is used for blocks - and it is only the "%}"
// that causes the decision of <CODE> or <LBRACE>
// CODE_BLOCK this as a single token introduced with "%".
<CODE_BLOCK>
// try:
//
// <PERCENT_CHAR> and token state change
}
Node literal() : { Node n ; }
{
( n = rdfLiteral() | n = numericLiteral() | n = booleanLiteral() )
{ return n; }
}
Node predicate() : { String s; }
{
( s = iri() { return createURI(s, token.beginLine, token.beginColumn); }
| <RDF_TYPE> { return nRDFtype; }
)
}
Node shapeExprLabel() : { Node n ; }
{
//iri() | blankNode()
n = _label() { return n ; }
}
Node tripleExprLabel() : { Node n; }
{
//iri() | blankNode()
n = _label() { return n ; }
}
Node _label() : { String n; }
{
( n = iri()
{ return createURI(n, token.beginLine, token.beginColumn); }
| n = blankNode()
{ return createBNode(n, token.beginLine, token.beginColumn); }
)
}
// ------------------------- Terms
Node booleanLiteral():{}
{
<TRUE> { return XSD_TRUE; }
|
<FALSE> { return XSD_FALSE; }
}
Node numericLiteral():{ Token t; }
{
( t = <INTEGER>
{ return createLiteralInteger(t.image, token.beginLine, token.beginColumn) ; }
| t = <DECIMAL>
{ return createLiteralDecimal(t.image, token.beginLine, token.beginColumn) ; }
| t = <DOUBLE>
{ return createLiteralDouble(t.image, token.beginLine, token.beginColumn) ; }
)
}
Node rdfLiteral() : { Node n; Token t; String lex; String dt = null; }
{
n = langString() { return n; }
|
lex = string()
{ t = token; }
("^^" dt = datatype())?
{ return createLiteral(lex, null, dt, t.beginLine, t.beginColumn) ; }
}
String datatype():{ String s; }
{
s = iri() { return s; }
}
String string() : { Token t ; String lex ; }
{
( t = <STRING_LITERAL1> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL2> { lex = stripQuotes(t.image) ; }
| t = <STRING_LITERAL_LONG1> { lex = stripQuotes3(t.image) ; }
| t = <STRING_LITERAL_LONG2> { lex = stripQuotes3(t.image) ; }
)
{ return unescapeStr(lex, t.beginLine, t.beginColumn) ; }
}
Node langString() : { Token t; Node n; }
{
t = <LANG_STRING_LITERAL1> { return langStringLiteral(1, t.image, t.beginLine, t.beginColumn); }
| t = <LANG_STRING_LITERAL2> { return langStringLiteral(1, t.image, t.beginLine, t.beginColumn); }
| t = <LANG_STRING_LITERAL_LONG2> { return langStringLiteral(3, t.image, t.beginLine, t.beginColumn); }
| t = <LANG_STRING_LITERAL_LONG1> { return langStringLiteral(3, t.image, t.beginLine, t.beginColumn); }
}
String iri() : { String iri = null; }
{
iri = IRIREF() { return iri ; }
|
iri = prefixedName() { return iri ; }
}
String blankNode() : {}
{
<BLANK_NODE_LABEL>
{ return token.image; }
}
String prefixedName() : { Token t ; }
{
( t = <PNAME_LN>
{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
|
t = <PNAME_NS>
{ return resolvePName(t.image, t.beginLine, t.beginColumn) ; }
)
}
String IRIREF() : { Token t ; }
{
t = <IRIref>
{ return resolveQuotedIRI(t.image, t.beginLine, t.beginColumn) ; }
}
// ---- Shape Map
void UnitShapeMap(): { }
{
(<BOM>)?
shexMapDoc()
<EOF>
}
void shexMapDoc() : {}
{
// Extension
( directive() )*
shapeMap()
}
void shapeMap() : {}
{
shapeAssociation()
//(<COMMA> shapeAssociation() )*
((<COMMA>)? shapeAssociation())*
}
void shapeAssociation() : { Node n = null ; Triple t = null ; Node label; }
{
//nodeSelector()
// Inline to return either kind.
( n = objectTerm() | t = triplePattern() )
label = shapeSpec()
{ association(n, t, label); }
// Extension.
(<DOT>)?
}
// void nodeSelector() : { Node n; Triple t; }
// {
// ( n = objectTerm() { shapeAssociation(n); }
// | t = triplePattern() { shapeAssociation(t); }
// )
// }
Node subjectTerm() : { String s; }
{
( s = iri()
{ return createURI(s, token.beginLine, token.beginColumn); }
| s = blankNode()
{ return createBNode(s, token.beginLine, token.beginColumn); }
)
}
Node objectTerm() : { Node n; }
{
( n = subjectTerm() | n = literal() )
{ return n ; }
}
Triple triplePattern() : { Node s = null; Node p = null ; Node o = null ; Token tok; }
{
tok = <LBRACE>
(
<FOCUS> { s = focusNode; } p = predicate() ( o = objectTerm() | <USCORE> { o = Node.ANY; } )
|
( s = subjectTerm() | <USCORE> { s = Node.ANY; } ) p = predicate() <FOCUS> { o = focusNode;}
)
<RBRACE>
{ return createTriple(s, p, o, tok.beginLine, tok.beginColumn); }
}
Node shapeSpec() : { String x; }
{
( <AT> ( x = iri() { return createURI(x, token.beginLine, token.beginColumn); }
| <START> { return startNode; }
)
) | <ATSTART> { return startNode; }
}
// ----
SKIP : { " " | "\t" | "\n" | "\r" | "\f" }
// SPECIAL_TOKEN :
// { <SINGLE_LINE_COMMENT: "#" (~["\n","\r"])* ("\n"|"\r"|"\r\n")? > }
// Comments.
SKIP :
{
< "#" (~["\r", "\n"])* >
| < "/*" > : ML_COMMENT_STATE
}
<ML_COMMENT_STATE> SKIP :
{
< "*/" > : DEFAULT
| < ~[] >
}
// // C-style comments (they don't nest /* /*...*/ */ is a syntax error)
// // When a /* is seen in the DEFAULT state, skip it and switch to the IN_COMMENT state
// SKIP : { "/*": IN_COMMENT }
//
// // When any other character is seen in the IN_COMMENT state, skip it.
// < IN_COMMENT > SKIP : { < ~[] > }
//
// // When a */ is seen in the IN_COMMENT state, skip it and switch back to the DEFAULT state
// < IN_COMMENT > SKIP : { "*/": DEFAULT }
TOKEN: {
<BOM: "\uFEFF">
| <RDF_TYPE : "a" >
}
TOKEN [IGNORE_CASE] :
{
// Keywords
<BASE : "BASE" >
| <IMPORT : "IMPORT">
| <PREFIX : "PREFIX">
| <SHAPE_CLASS : "shapeClass">
| <SHAPE : "shape">
| <START : "start">
| <EXTERNAL : "external">
| <FOCUS : "focus">
| <NOT : "not">
| <OR : "or" >
| <AND : "and" >
| <LITERAL : "literal">
| <IRI : "iri" >
| <BNODE : "bnode" >
| <NONLITERAL : "nonliteral" >
| <LENGTH : "length" >
| <MINLENGTH : "minlength" >
| <MAXLENGTH : "maxlength" >
| <MININCLUSIVE : "mininclusive" >
| <MINEXCLUSIVE : "minexclusive" >
| <MAXINCLUSIVE : "maxinclusive" >
| <MAXEXCLUSIVE : "maxexclusive" >
| <TOTALDIGITS : "totaldigits" >
| <FRACTIONDIGITS : "fractiondigits" >
| <CLOSED : "closed" >
| <EXTRA : "extra" >
// Works but slow.
//| <CODE : "{" (~[ "%","\\"] | "\\" ["%","\\"] )* "%" "}" >
| <CODE_BLOCK : <PERCENT_CHAR>
(" ")*
(<IRIref>|<PNAME_LN>|<PNAME_NS>)
(" ")*
( <LBRACE>
( ~[ "%","\\"] | ("\\" ["%","\\"]) | <UCHAR> )*
<PERCENT_CHAR><RBRACE>
| <PERCENT_CHAR> ) >
// Why is this a token?
| <REPEAT_RANGE : <LBRACE> <INTEGER> ( <COMMA> (<INTEGER> | <STAR> )? )? <RBRACE> >
| <TRUE : "true">
| <FALSE : "false">
| <#HEX : ["0"-"9"] | ["A"-"F"] | ["a"-"f"] >
| <PLUS : "+" >
| <MINUS : "-" >
| <VBAR : "|" >
| <AT : "@">
| <CARAT : "^">
| <DOT : ".">
| <BANG : "!">
| <QMARK : "?">
| <SLASH : "/">
| <STAR : "*">
| <EQUALS : "=">
| <LPAREN : "(" >
| <RPAREN : ")" >
| <LBRACE : "{" >
| <RBRACE : "}" >
| <LBRACKET : "[" >
| <RBRACKET : "]" >
| <PERCENT_CHAR : "%" >
| <COMMA : "," >
| <USCORE : "_" >
// Breaks PNAME_NS
// | <COLON : ":" >
| <SEMI_COLON : ";" >
| <DOLLAR : "$" >
| <TILDE : "~" >
| <AMP : "&" >
| <#UCHAR : "\\" ( "u" <HEX> <HEX> <HEX> <HEX> |
"U" <HEX> <HEX> <HEX> <HEX> <HEX> <HEX> <HEX> <HEX>
) >
| <IRIref : "<"
( ~[ ">","<", "\"", "{", "}", "^", "\\", "|", "`","\u0000"-"\u0020"]
| <UCHAR>
)*
">" >
| <PNAME_NS : (<PN_PREFIX>)? ":" >
| <PNAME_LN : <PNAME_NS> <PN_LOCAL> >
| <ATPNAME_NS : <AT><PNAME_LN> >
| <ATPNAME_LN : <AT><PNAME_NS> >
| <ATSTART : <AT><START> >
| <#QUOTE_3D: "\"\"\"">
| <#QUOTE_3S: "'''">
| <ECHAR : "\\" ( "t"|"b"|"n"|"r"|"f"|"\\"|"\""|"'") >
| <STRING_LITERAL1:
// Single quoted string
"'" ( (~["'","\\","\n","\r"]) | <ECHAR> | <UCHAR> )* "'" >
| <STRING_LITERAL2:
// Double quoted string
"\"" ( (~["\"","\\","\n","\r"]) | <ECHAR> | <UCHAR> )* "\"" >
| <STRING_LITERAL_LONG1:
<QUOTE_3S>
( ("'" | "''")? (~["'","\\"] | <ECHAR> | <UCHAR> ))*
<QUOTE_3S> >
| <STRING_LITERAL_LONG2:
<QUOTE_3D>
( ("\"" | "\"\"")? (~["\"","\\"] | <ECHAR> | <UCHAR>))*
<QUOTE_3D> >
| <LANG_STRING_LITERAL1 : <STRING_LITERAL1> <LANGTAG> >
| <LANG_STRING_LITERAL2 : <STRING_LITERAL2> <LANGTAG> >
| <LANG_STRING_LITERAL_LONG1 : <STRING_LITERAL_LONG1> <LANGTAG> >
| <LANG_STRING_LITERAL_LONG2 : <STRING_LITERAL_LONG2> <LANGTAG> >
| <#DIGITS : (["0"-"9"])+>
| <INTEGER : (<PLUS>|<MINUS>)? <DIGITS> >
| <DECIMAL : (<PLUS>|<MINUS>)? (<DIGITS>)? "." <DIGITS> >
| <DOUBLE : (<PLUS>|<MINUS>)?
( (["0"-"9"])+ "." (["0"-"9"])* <EXPONENT>
| "." (["0"-"9"])+ (<EXPONENT>)
| (["0"-"9"])+ <EXPONENT>
) >
| <#EXPONENT : ["e","E"] (["+","-"])? (["0"-"9"])+ >
| <REGEXP : <SLASH>
( ~["/","\\","\n", "\r"]
| "\\" [ "n", "r", "t", "\\", "|", "." , "?", "*", "+",
"(", ")", "{", "}", "$", "-", "[", "]", "^", "/" ]
| <UCHAR>
)+ <SLASH> (["s","m","i","x"])*
>
| <BLANK_NODE_LABEL: "_:" (<PN_CHARS_U> | ["0"-"9"]) ((<PN_CHARS> | ".")* <PN_CHARS>)? >
| <LANGTAG: <AT> (<A2Z>)+("-" (<A2ZN>)+)* >
| <#A2Z: ["a"-"z","A"-"Z"]>
| <#A2ZN: ["a"-"z","A"-"Z","0"-"9"]>
| <#PN_CHARS_BASE:
["A"-"Z"] | ["a"-"z"] |
["\u00C0"-"\u00D6"] | ["\u00D8"-"\u00F6"] | ["\u00F8"-"\u02FF"] |
["\u0370"-"\u037D"] | ["\u037F"-"\u1FFF"] |
["\u200C"-"\u200D"] | ["\u2070"-"\u218F"] | ["\u2C00"-"\u2FEF"] |
["\u3001"-"\uD7FF"] | ["\uF900"-"\uFFFD"]
>
// [#x10000-#xEFFFF]
|
// With underscore
<#PN_CHARS_U: <PN_CHARS_BASE> | "_" >
|
<#PN_CHARS: (<PN_CHARS_U> | "-" | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] ) >
|
// No leading "_", no trailing ".", can have dot inside prefix name.
<#PN_PREFIX: <PN_CHARS_BASE> ((<PN_CHARS>|".")* <PN_CHARS>)? >
|
<#PN_LOCAL: (<PN_CHARS_U> | ":" | ["0"-"9"] | <PLX> )
( (<PN_CHARS> | "." |":" | <PLX> )*
(<PN_CHARS> | ":" | <PLX>) )? >
|
<#VARNAME: ( <PN_CHARS_U> | ["0"-"9"] )
( <PN_CHARS_U> | ["0"-"9"] | "\u00B7" |
["\u0300"-"\u036F"] | ["\u203F"-"\u2040"] )* >
|
< #PN_LOCAL_ESC: "\\"
( "_" |
"~" | "." | "-" | "!" | "$" | "&" | "'" |
"(" | ")" | "*" | "+" | "," | ";" | "=" |
"/" | "?" | "#" | "@" | "%" ) >
|
<#PLX: <PERCENT> | <PN_LOCAL_ESC> >
|
<#PERCENT : "%" <HEX> <HEX> >
}
// Catch-all tokens. Must be last.
// Any non-whitespace. Causes a parser exception, rather than a
// token manager error (which hides the line numbers).
TOKEN:
{
<#UNKNOWN: (~[" ","\t","\n","\r","\f" ])+ >
}
/*
# Local Variables:
# tab-width: 4
# indent-tabs-mode: nil
# comment-default-style: "//"
# End:
*/