blob: b0947a37f146e5ff992276800c1f3a3632e9fffb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.out ;
import java.net.MalformedURLException ;
import org.apache.jena.atlas.io.AWriter ;
import org.apache.jena.atlas.lib.CharSpace ;
import org.apache.jena.atlas.lib.Pair ;
import org.apache.jena.datatypes.xsd.XSDDatatype ;
import org.apache.jena.graph.Node ;
import org.apache.jena.iri.IRI ;
import org.apache.jena.iri.IRIRelativize ;
import org.apache.jena.riot.system.IRIResolver ;
import org.apache.jena.riot.system.PrefixMap ;
import org.apache.jena.riot.system.PrefixMapFactory ;
import static org.apache.jena.riot.system.RiotChars.* ;
/** Node formatter for Turtle using single line strings */
public class NodeFormatterTTL extends NodeFormatterNT
{
private final NodeToLabel nodeToLabel ;
private final PrefixMap prefixMap ;
private final String baseIRI ;
private final IRI iriResolver ;
public NodeFormatterTTL(String baseIRI, PrefixMap prefixMap) {
this(baseIRI, prefixMap, NodeToLabel.createBNodeByLabelEncoded()) ;
}
public NodeFormatterTTL(String baseIRI, PrefixMap prefixMap, NodeToLabel nodeToLabel) {
super(CharSpace.UTF8) ;
this.nodeToLabel = nodeToLabel ;
if ( prefixMap == null )
prefixMap = PrefixMapFactory.create() ;
this.prefixMap = prefixMap ;
this.baseIRI = baseIRI ;
this.iriResolver =
baseIRI != null ? IRIResolver.iriFactory().create(baseIRI) : null ;
}
@Override
public void formatURI(AWriter w, String uriStr) {
Pair<String, String> pName = prefixMap.abbrev(uriStr) ;
// Check if legal
if ( pName != null ) {
// Check legal - need to check its legal, not for illegal.
// The splitter in "abbrev" only has a weak rule.
String prefix = pName.getLeft() ;
String localname = pName.getRight() ;
if ( safePrefixName(prefix, localname) ) {
w.print(prefix) ;
w.print(':') ;
w.print(localname) ;
return ;
}
}
// Attempt base abbreviation.
if ( iriResolver != null ) {
String x = abbrevByBase(uriStr) ;
if ( x != null ) {
w.print('<') ;
w.print(x) ;
w.print('>') ;
return ;
}
}
// else
super.formatURI(w, uriStr) ;
}
static private int relFlags = IRIRelativize.SAMEDOCUMENT | IRIRelativize.CHILD ;
/** Abbreviate the URI */
private String abbrevByBase(String uri) {
IRI rel = iriResolver.relativize(uri, relFlags) ;
String r = null ;
try {
r = rel.toASCIIString() ;
} catch (MalformedURLException ex) {
r = rel.toString() ;
}
return r ;
}
@Override
public void formatBNode(AWriter w, Node n) {
String x = nodeToLabel.get(null, n) ;
w.print(x) ;
}
// From NodeFormatterNT:
// @Override
// public void formatVar(WriterI w, String name)
// @Override
// public void formatLitString(WriterI w, String lex)
// @Override
// public void formatLitLang(WriterI w, String lex, String langTag)
static boolean safePrefixName(String prefix, String localname) {
return safeForPrefix(prefix) && safeForPrefixLocalname(localname) ;
}
// [139s] PNAME_NS ::= PN_PREFIX? ':'
// [140s] PNAME_LN ::= PNAME_NS PN_LOCAL
// [167s] PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
// [168s] PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
// [163s] PN_CHARS_BASE ::= [A-Z] | [a-z] | [#x00C0-#x00D6] | [#x00D8-#x00F6] | [#x00F8-#x02FF] | [#x0370-#x037D] | [#x037F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
// [164s] PN_CHARS_U ::= PN_CHARS_BASE | '_'
// [166s] PN_CHARS ::= PN_CHARS_U | '-' | [0-9] | #x00B7 | [#x0300-#x036F] | [#x203F-#x2040]
// [169s] PLX ::= PERCENT | PN_LOCAL_ESC
// [170s] PERCENT ::= '%' HEX HEX
// [171s] HEX ::= [0-9] | [A-F] | [a-f]
// [172s] PN_LOCAL_ESC ::= '\' ('_' | '~' | '.' | '-' | '!' | '$' | '&' | "'" | '(' | ')' | '*' | '+' | ',' | ';' | '=' | '/' | '?' | '#' | '@' | '%')
/* private-testing */
static boolean safeForPrefix(String str) {
// PN_PREFIX ::= PN_CHARS_BASE ((PN_CHARS | '.')* PN_CHARS)?
int N = str.length() ;
if ( N == 0 )
return true ;
int idx = 0 ;
idx = skip1_PN_CHARS_BASE(str, idx) ;
if ( idx == -1 )
return false ;
idx = skipAny_PN_CHARS_or_DOT(str, idx, N - 1) ;
if ( idx == -1 )
return false ;
idx = skip1_PN_CHARS(str, idx) ;
if ( idx == -1 )
return false ;
return (idx == N) ;
}
/* private-testing */static boolean safeForPrefixLocalname(String str) {
// PN_LOCAL ::= (PN_CHARS_U | ':' | [0-9] | PLX) ((PN_CHARS | '.' | ':' | PLX)* (PN_CHARS | ':' | PLX))?
// This code does not consider PLX (which is more than one character).
int N = str.length() ;
if ( N == 0 )
return true ;
int idx = 0 ;
idx = skip1_PN_CHARS_U_or_digit_or_COLON(str, idx) ;
if ( idx == -1 )
return false ;
idx = skipAny_PN_CHARS_or_DOT_or_COLON(str, idx, N - 1) ;
if ( idx == -1 )
return false ;
// Final char
idx = skip1_PN_CHARS_or_COLON(str, idx) ;
if ( idx == -1 )
return false ;
// We got to the end.
return (idx == N) ;
}
// ---- Prefix name : prefix part
private static int skip1_PN_CHARS_BASE(String str, int idx) {
char ch = str.charAt(idx) ;
if ( isPNCharsBase(ch) )
return idx + 1 ;
return -1 ;
}
private static int skipAny_PN_CHARS_or_DOT(String str, int idx, int max) {
for (int i = idx; i < max; i++) {
char ch = str.charAt(i) ;
if ( !isPNChars(ch) && ch != '.' )
return i ;
}
return max ;
}
private static int skip1_PN_CHARS(String str, int idx) {
char ch = str.charAt(idx) ;
if ( isPNChars(ch) )
return idx + 1 ;
return -1 ;
}
// ---- Prefix name : local part
private static int skip1_PN_CHARS_U_or_digit_or_COLON(String str, int idx) {
char ch = str.charAt(idx) ;
if ( isPNChars_U(ch) )
return idx + 1 ;
if ( isDigit(ch) )
return idx + 1 ;
if ( ch == ':' )
return idx + 1 ;
return -1 ;
}
private static int skipAny_PN_CHARS_or_DOT_or_COLON(String str, int idx, int max) {
for (int i = idx; i < max; i++) {
char ch = str.charAt(i) ;
if ( !isPNChars(ch) && ch != '.' && ch != ':' )
return i ;
}
return max ;
}
private static int skip1_PN_CHARS_or_COLON(String str, int idx) {
char ch = str.charAt(idx) ;
if ( isPNChars(ch) )
return idx + 1 ;
if ( ch == ':' )
return idx + 1 ;
return -1 ;
}
// ----
private static final String dtDecimal = XSDDatatype.XSDdecimal.getURI() ;
private static final String dtInteger = XSDDatatype.XSDinteger.getURI() ;
private static final String dtDouble = XSDDatatype.XSDdouble.getURI() ;
private static final String dtBoolean = XSDDatatype.XSDboolean.getURI() ;
@Override
public void formatLitDT(AWriter w, String lex, String datatypeURI) {
boolean b = writeLiteralAbbreviated(w, lex, datatypeURI) ;
if ( b ) return ;
writeLiteralLongForm(w, lex, datatypeURI) ;
}
protected void writeLiteralLongForm(AWriter w, String lex, String datatypeURI) {
writeLiteralOneLine(w, lex, datatypeURI);
}
protected void writeLiteralOneLine(AWriter w, String lex, String datatypeURI) {
super.formatLitDT(w, lex, datatypeURI) ;
}
/** Write in a short form, e.g. integer.
* @return True if a short form was output else false.
*/
protected boolean writeLiteralAbbreviated(AWriter w, String lex, String datatypeURI) {
if ( dtDecimal.equals(datatypeURI) ) {
if ( validDecimal(lex) ) {
w.print(lex) ;
return true ;
}
} else if ( dtInteger.equals(datatypeURI) ) {
if ( validInteger(lex) ) {
w.print(lex) ;
return true ;
}
} else if ( dtDouble.equals(datatypeURI) ) {
if ( validDouble(lex) ) {
w.print(lex) ;
return true ;
}
} else if ( dtBoolean.equals(datatypeURI) ) {
// We leave "0" and "1" as-is assumign that if written like that,
// there was a reason.
if ( lex.equals("true") || lex.equals("false") ) {
w.print(lex) ;
return true ;
}
}
return false ;
}
private static boolean validInteger(String lex) {
int N = lex.length() ;
if ( N == 0 )
return false ;
int idx = 0 ;
idx = skipSign(lex, idx) ;
idx = skipDigits(lex, idx) ;
return (idx == N) ;
}
private static boolean validDecimal(String lex) {
// case : In N3, "." illegal, as is "+." and -." but legal in Turtle.
int N = lex.length() ;
if ( N <= 1 )
return false ;
int idx = 0 ;
idx = skipSign(lex, idx) ;
idx = skipDigits(lex, idx) ; // Maybe none.
// DOT required.
if ( idx >= N )
return false ;
char ch = lex.charAt(idx) ;
if ( ch != '.' )
return false ;
idx++ ;
// Digit required.
if ( idx >= N )
return false ;
idx = skipDigits(lex, idx) ;
return (idx == N) ;
}
private static boolean validDouble(String lex) {
int N = lex.length() ;
if ( N == 0 )
return false ;
int idx = 0 ;
// Decimal part (except 12. is legal)
idx = skipSign(lex, idx) ;
int idx2 = skipDigits(lex, idx) ;
boolean initialDigits = (idx != idx2) ;
idx = idx2 ;
// Exponent required.
if ( idx >= N )
return false ;
char ch = lex.charAt(idx) ;
if ( ch == '.' ) {
idx++ ;
if ( idx >= N )
return false ;
idx2 = skipDigits(lex, idx) ;
boolean trailingDigits = (idx != idx2) ;
idx = idx2 ;
if ( idx >= N )
return false ;
if ( !initialDigits && !trailingDigits )
return false ;
}
// "e" or "E"
ch = lex.charAt(idx) ;
if ( ch != 'e' && ch != 'E' )
return false ;
idx++ ;
if ( idx >= N )
return false ;
idx = skipSign(lex, idx) ;
if ( idx >= N )
return false ; // At least one digit.
idx = skipDigits(lex, idx) ;
return (idx == N) ;
}
/**
* Skip digits [0-9] and return the index just after the digits, which may
* be beyond the length of the string. May skip zero.
*/
private static int skipDigits(String str, int start) {
int N = str.length() ;
for (int i = start; i < N; i++) {
char ch = str.charAt(i) ;
if ( ! isDigit(ch) )
return i ;
}
return N ;
}
/** Skip any plus or minus */
private static int skipSign(String str, int idx) {
int N = str.length() ;
char ch = str.charAt(idx) ;
if ( ch == '+' || ch == '-' )
return idx + 1 ;
return idx ;
}
}