blob: 821689df998fc02a8288700eb8174a25d24c84fb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.checker ;
import java.util.Objects ;
import java.util.regex.Pattern ;
import org.apache.jena.JenaRuntime ;
import org.apache.jena.datatypes.RDFDatatype ;
import org.apache.jena.graph.Node ;
import org.apache.jena.riot.system.ErrorHandler ;
import org.apache.jena.sparql.graph.NodeConst ;
import org.apache.jena.util.SplitIRI;
public class CheckerLiterals implements NodeChecker {
// A flag to enable the test suite to read bad data.
public static boolean WarnOnBadLiterals = true ;
private ErrorHandler handler ;
public CheckerLiterals(ErrorHandler handler) {
this.handler = handler ;
}
@Override
public boolean check(Node node, long line, long col) {
return node.isLiteral() && checkLiteral(node, handler, line, col) ;
}
final static private Pattern langPattern = Pattern.compile("[a-zA-Z]{1,8}(-[a-zA-Z0-9]{1,8})*") ;
public static boolean checkLiteral(Node node, ErrorHandler handler, long line, long col) {
if ( !node.isLiteral() ) {
handler.error("Not a literal: " + node, line, col) ;
return false ;
}
return checkLiteral(node.getLiteralLexicalForm(), node.getLiteralLanguage(), node.getLiteralDatatype(), handler, line, col) ;
}
public static boolean checkLiteral(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
return checkLiteral(lexicalForm, null, datatype, handler, line, col) ;
}
public static boolean checkLiteral(String lexicalForm, String lang, ErrorHandler handler, long line, long col) {
return checkLiteral(lexicalForm, lang, null, handler, line, col) ;
}
public static boolean checkLiteral(String lexicalForm, String lang, RDFDatatype datatype, ErrorHandler handler,
long line, long col) {
if ( !WarnOnBadLiterals )
return true ;
boolean hasLang = lang != null && !lang.equals("") ;
if ( !hasLang ) {
// Datatype check (and RDF 1.0 simple literals are always well formed)
if ( datatype != null )
return validateByDatatype(lexicalForm, datatype, handler, line, col) ;
return true ;
}
// Has a language.
if ( JenaRuntime.isRDF11 ) {
if ( datatype != null && !Objects.equals(datatype.getURI(), NodeConst.rdfLangString.getURI()) ) {
handler.error("Literal has language but wrong datatype", line, col) ;
return false ;
}
} else {
if ( datatype != null ) {
handler.error("Literal has datatype and language", line, col) ;
return false ;
}
}
// Test language tag format -- not a perfect test.
if ( !lang.isEmpty() && !langPattern.matcher(lang).matches() ) {
handler.warning("Language not valid: " + lang, line, col) ;
return false ;
}
return true ;
}
// Whitespace.
// XSD allows whitespace before and after the lexical forms of a literal but not insiode.
// Jena handles this correctly.
protected static boolean validateByDatatype(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
// if ( SysRIOT.StrictXSDLexicialForms )
// checkWhitespace(lexicalForm, datatype, handler, line, col);
return validateByDatatypeJena(lexicalForm, datatype, handler, line, col) ;
}
protected static boolean validateByDatatypeJena(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
if ( datatype.isValid(lexicalForm) )
return true ;
handler.warning("Lexical form '" + lexicalForm + "' not valid for datatype " + xsdDatatypeName(datatype), line, col) ;
return false ;
}
protected static boolean checkWhitespace(String lexicalForm, RDFDatatype datatype, ErrorHandler handler, long line, long col) {
if ( lexicalForm.contains(" ") ) {
handler.warning("Whitespace in "+xsdDatatypeName(datatype)+" literal: '" + lexicalForm + "'", line, col) ;
return false ;
}
if ( lexicalForm.contains("\n") ) {
handler.warning("Newline in "+xsdDatatypeName(datatype)+" literal: '" + lexicalForm + "'", line, col) ;
return false ;
}
if ( lexicalForm.contains("\r") ) {
handler.warning("Newline in "+xsdDatatypeName(datatype)+" literal: '" + lexicalForm + "'", line, col) ;
return false ;
}
return true ;
}
private static String xsdDatatypeName(RDFDatatype datatype) {
return "XSD "+SplitIRI.localname(datatype.getURI());
}
}