blob: 5a5437e7654de679339cf12010cbf98d7cdbe859 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.lang.rdfxml;
import java.io.IOException ;
import java.io.InputStream ;
import java.io.Reader ;
import java.util.Map;
import org.apache.jena.atlas.lib.Pair ;
import org.apache.jena.atlas.logging.Log;
import org.apache.jena.atlas.web.ContentType;
import org.apache.jena.datatypes.RDFDatatype ;
import org.apache.jena.datatypes.TypeMapper ;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node ;
import org.apache.jena.graph.Triple ;
import org.apache.jena.irix.IRIs;
import org.apache.jena.rdf.model.RDFErrorHandler ;
import org.apache.jena.rdfxml.xmlinput1.*;
import org.apache.jena.rdfxml.xmlinput1.impl.ARPSaxErrorHandler;
import org.apache.jena.riot.*;
import org.apache.jena.riot.system.ErrorHandler;
import org.apache.jena.riot.system.FactoryRDF;
import org.apache.jena.riot.system.ParserProfile;
import org.apache.jena.riot.system.StreamRDF;
import org.apache.jena.sparql.util.Context;
import org.xml.sax.SAXException ;
import org.xml.sax.SAXParseException ;
/** RDF/XML.
* <p>
* <b>LEGACY</b>
* <p>
* Uses xmlinput1 - the version of ARP from Jena 4.7.0 to Jena 4.10.0.
* <p>
* Replaced at Jena 5 by RRX.
*
* @see <a href="http://www.w3.org/TR/rdf-syntax-grammar/">http://www.w3.org/TR/rdf-syntax-grammar/</a>
*/
public class ReaderRDFXML_ARP1 implements ReaderRIOT
{
public static ReaderRIOTFactory factory = (Lang language, ParserProfile parserProfile) -> {
// Ignore the provided ParserProfile
// ARP predates RIOT and does many things internally already.
return new ReaderRDFXML_ARP1(parserProfile);
};
private final ParserProfile parserProfile;
private final ErrorHandler errorHandler;
public ReaderRDFXML_ARP1(ParserProfile parserProfile) {
this.parserProfile = parserProfile;
this.errorHandler = parserProfile.getErrorHandler();
}
@Override
public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, Context context) {
parse(in, null, baseURI, ct, output, context);
}
@Override
public void read(Reader reader, String baseURI, ContentType ct, StreamRDF output, Context context) {
parse(null, reader, baseURI, ct, output, context);
}
// RDF 1.1 is based on URIs/IRIs, where space are not allowed.
// RDF 1.0 (and RDF/XML) was based on "RDF URI References" which did allow spaces.
// Use with TDB requires this to be "true" - it is set by InitTDB.
public static final boolean RiotUniformCompatibility = true ;
// Warnings in ARP that should be errors to be compatible with
// non-XML-based languages. e.g. language tags should be
// syntactically valid.
private static int[] additionalErrors = new int[] {
ARPErrorNumbers.WARN_MALFORMED_XMLLANG
//, ARPErrorNumbers.WARN_MALFORMED_URI
//, ARPErrorNumbers.WARN_STRING_NOT_NORMAL_FORM_C
} ;
// Special case of space in URI is handled in HandlerSink (below).
// This is instead of ARPErrorNumbers.WARN_MALFORMED_URI in additionalErrors[].
// which causes a WARN (from ARP, with line+column numbers) then a ERROR from RIOT.
// It's a pragmatic compromise.
private static boolean errorForSpaceInURI = true;
// Extracted from org.apache.jena.rdfxml.xmlinput.JenaReader
private void oneProperty(ARPOptions options, String pName, Object value) {
if (! pName.startsWith("ERR_") && ! pName.startsWith("IGN_") && ! pName.startsWith("WARN_"))
return ;
int cond = ParseException.errorCode(pName);
if (cond == -1)
throw new RiotException("No such ARP property: '"+pName+"'");
int val;
if (value instanceof String) {
if (!((String) value).startsWith("EM_"))
throw new RiotException("Value for ARP property does not start EM_: '"+pName+"' = '"+value+"'" );
val = ParseException.errorCode((String) value);
if (val == -1 )
throw new RiotException("Illegal value for ARP property: '"+pName+"' = '"+value+"'" );
} else if (value instanceof Integer) {
val = ((Integer) value).intValue();
switch (val) {
case ARPErrorNumbers.EM_IGNORE:
case ARPErrorNumbers.EM_WARNING:
case ARPErrorNumbers.EM_ERROR:
case ARPErrorNumbers.EM_FATAL:
break;
default:
throw new RiotException("Illegal value for ARP property: '"+pName+"' = '"+value+"'" );
}
} else {
throw new RiotException("Property \"" + pName + "\" cannot have value: " + value.toString());
}
options.setErrorMode(cond, val);
}
@SuppressWarnings({"deprecation", "removal"})
private void parse(InputStream input, Reader reader, String xmlBase, ContentType ct, StreamRDF sink, Context context) {
// One of input and reader is null.
boolean legacySwitch = context.isTrue(RIOT.symRDFXML0);
if ( legacySwitch ) {
Log.warnOnce(SysRIOT.getLogger(),
"Do not use rdfxml:rdfxml0 - use Lang RRX#RDFXML_ARP0 or \"--syntax arp0\"",
ReaderRDFXML_ARP0.class);
ReaderRDFXML_ARP0 other = new ReaderRDFXML_ARP0(parserProfile.getErrorHandler());
other.parse(input, reader, xmlBase, ct, sink, context);
return;
}
// Hacked out of ARP because of all the "private" methods
// JenaReader has reset the options since new ARP() was called.
sink.start() ;
HandlerSink rslt = new HandlerSink(sink, parserProfile) ;
ARP arp = new ARP();
arp.getHandlers().setStatementHandler(rslt) ;
arp.getHandlers().setErrorHandler(rslt) ;
arp.getHandlers().setNamespaceHandler(rslt) ;
// ARPOptions.
ARPOptions arpOptions = arp.getOptions() ;
if ( RiotUniformCompatibility ) {
// Convert some warnings to errors for compatible behaviour for all parsers.
for ( int code : additionalErrors )
arpOptions.setErrorMode(code, ARPErrorNumbers.EM_ERROR) ;
}
if ( context != null ) {
Map<String, Object> properties = null;
try {
@SuppressWarnings("unchecked")
Map<String, Object> p = (Map<String, Object>)(context.get(SysRIOT.sysRdfReaderProperties)) ;
properties = p;
} catch(Throwable ex) {
Log.warn(this, "Problem accessing the RDF/XML reader properties: properties ignored", ex);
}
if ( properties != null )
properties.forEach((k,v) -> oneProperty(arpOptions, k, v)) ;
}
arp.setOptionsWith(arpOptions) ;
String filename = xmlBase;
try {
if ( reader != null )
arp.load(reader, xmlBase) ;
else
arp.load(input, xmlBase) ;
}
catch (IOException e) {
errorHandler.error(filename + ": " + ParseException.formatMessage(e), -1, -1) ;
}
catch (SAXParseException e) {
// already reported.
}
catch (SAXException sax) {
errorHandler.error(filename + ": " + ParseException.formatMessage(sax), -1, -1) ;
}
sink.finish() ;
}
/** Sort out the base URI for RDF/XML parsing. */
private static String baseURI_RDFXML(String baseIRI) {
if ( baseIRI == null )
return IRIs.getBaseStr();
// RDFParserBuidler resolved the baseIRI
return baseIRI;
}
private static class HandlerSink extends ARPSaxErrorHandler implements StatementHandler, NamespaceHandler {
private final StreamRDF output ;
private final ParserProfile parserProfile;
private final ErrorHandler riotErrorHandler;
private final FactoryRDF termFactory;
HandlerSink(StreamRDF output, ParserProfile parserProfile) {
super(new ErrorHandlerBridge(parserProfile.getErrorHandler())) ;
this.output = output ;
this.parserProfile = parserProfile;
this.riotErrorHandler = parserProfile.getErrorHandler();
this.termFactory = parserProfile.getFactorRDF();
}
@Override
public void statement(AResource subj, AResource pred, AResource obj)
{ output.triple(convert(subj, pred, obj)); }
@Override
public void statement(AResource subj, AResource pred, ALiteral lit)
{ output.triple(convert(subj, pred, lit)) ; }
// Should be called by RDFXMLReader.
private Node convert(ALiteral lit) {
String dtURI = lit.getDatatypeURI();
if (dtURI == null)
return parserProfile.createLangLiteral(lit.toString(), lit.getLang(), -1, -1);
if (lit.isWellFormedXML()) {
return parserProfile.createTypedLiteral(lit.toString(), XMLLiteralType.theXMLLiteralType, -1, -1);
}
RDFDatatype dt = TypeMapper.getInstance().getSafeTypeByName(dtURI);
return parserProfile.createTypedLiteral(lit.toString(), dt, -1, -1);
}
private Node convert(AResource r) {
if (!r.isAnonymous()) {
// URI.
String uriStr = r.getURI() ;
if ( errorForSpaceInURI ) {
// Special check for spaces in a URI.
// Convert to an error like TokernizerText.
if ( uriStr.contains(" ") ) {
int i = uriStr.indexOf(' ');
String s = uriStr.substring(0,i);
String msg = String.format("Bad character in IRI (space): <%s[space]...>", s);
riotErrorHandler.error(msg, -1, -1);
throw new RiotParseException(msg, -1, -1);
}
}
return termFactory.createURI(uriStr);
}
// String id = r.getAnonymousID();
Node rr = (Node) r.getUserData();
if (rr == null) {
rr = termFactory.createBlankNode();
r.setUserData(rr);
}
return rr;
}
private Triple convert(AResource s, AResource p, AResource o) {
return Triple.create(convert(s), convert(p), convert(o)) ;
}
private Triple convert(AResource s, AResource p, ALiteral o) {
return Triple.create(convert(s), convert(p), convert(o)) ;
}
@Override
public void startPrefixMapping(String prefix, String uri) {
output.prefix(prefix, uri) ;
}
@Override
public void endPrefixMapping(String prefix) {}
}
private static class ErrorHandlerBridge implements RDFErrorHandler {
private ErrorHandler errorHandler ;
ErrorHandlerBridge(ErrorHandler hander) {
this.errorHandler = hander ;
}
@Override
public void warning(Exception e) {
Pair<Integer, Integer> p = getLineCol(e) ;
errorHandler.warning(e.getMessage(), p.getLeft(), p.getRight()) ;
}
@Override
public void error(Exception e) {
Pair<Integer, Integer> p = getLineCol(e) ;
errorHandler.error(e.getMessage(), p.getLeft(), p.getRight()) ;
}
@Override
public void fatalError(Exception e) {
Pair<Integer, Integer> p = getLineCol(e) ;
errorHandler.fatal(e.getMessage(), p.getLeft(), p.getRight()) ;
}
private static Pair<Integer, Integer> getLineCol(Exception e) {
if ( e instanceof SAXParseException ) {
SAXParseException esax = (SAXParseException)e ;
return Pair.create(esax.getLineNumber(), esax.getColumnNumber()) ;
} else {
return Pair.create(-1, -1) ;
}
}
}
}