blob: 20328c4a5866a7ffb49c8a24d2870123ceb91c5e [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jena.riot.lang.rdfxml.rrx;
import static org.apache.jena.riot.SysRIOT.fmtMessage;
import java.io.IOException;
import java.util.*;
import javax.xml.XMLConstants;
import javax.xml.namespace.NamespaceContext;
import javax.xml.namespace.QName;
import org.apache.jena.atlas.io.IndentedWriter;
import org.apache.jena.datatypes.RDFDatatype;
import org.apache.jena.datatypes.xsd.impl.XMLLiteralType;
import org.apache.jena.graph.Node;
import org.apache.jena.graph.NodeFactory;
import org.apache.jena.graph.Triple;
import org.apache.jena.irix.IRIException;
import org.apache.jena.irix.IRIs;
import org.apache.jena.irix.IRIx;
import org.apache.jena.riot.RiotException;
import org.apache.jena.riot.lang.rdfxml.RDFXMLParseException;
import org.apache.jena.riot.out.NodeFmtLib;
import org.apache.jena.riot.system.FactoryRDF;
import org.apache.jena.riot.system.ParserProfile;
import org.apache.jena.riot.system.StreamRDF;
import org.apache.jena.sparql.util.Context;
import org.apache.jena.util.XML11Char;
import org.apache.jena.vocabulary.RDF;
import org.apache.jena.vocabulary.RDF.Nodes;
import org.xml.sax.*;
import org.xml.sax.ext.DeclHandler;
import org.xml.sax.ext.EntityResolver2;
import org.xml.sax.ext.LexicalHandler;
public class ParserRDFXML_SAX
implements
ContentHandler,
ErrorHandler,
EntityResolver,
DTDHandler,
LexicalHandler,
DeclHandler,
EntityResolver2 {
public static boolean TRACE = false;
private static boolean VERBOSE = false;
// Addition tracing for SAX events we don't care about.
private static boolean EVENTS = false;
private final IndentedWriter trace;
private final IndentedWriter traceXML;
// ---- Constants.
private static final String rdfNS = RDF.uri;
private static final String xmlNS = XMLConstants.XML_NS_URI;
// QName local names.
private static final String rdfRDF = "RDF";
private static final String rdfDescription = "Description";
private static final String rdfID = "ID";
private static final String rdfNodeID = "nodeID";
private static final String rdfAbout = "about";
private static final String rdfType = "type";
private static final String rdfSeq = "Seq";
private static final String rdfBag = "Bag";
private static final String rdfAlt = "Alt";
private static final String rdfDatatype = "datatype";
private static final String rdfParseType = "parseType";
private static final String rdfResource = "resource";
private static final String rdfContainerItem = "li";
private static final String rdfAboutEach = "aboutEach";
private static final String rdfAboutEachPrefix = "aboutEachPrefix";
private static final String rdfBagID = "bagID";
private static final RDFDatatype rdfXmlLiteralDT = XMLLiteralType.theXMLLiteralType;
// LN = Local name.
private static final String xmlBaseLN = "base";
private static final String xmlLangLN = "lang";
// xml:space is a now-deprecated XML attribute that related to handing
// whitespace characters inside elements.
private static final String xmlSpaceLN = "space";
// Grammar productions.
// 6.2.2 Production coreSyntaxTerms
// rdf:RDF | rdf:ID | rdf:about | rdf:parseType | rdf:resource | rdf:nodeID | rdf:datatype
private static Set<String> $coreSyntaxTerms =
Set.of(rdfRDF, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype);
// 6.2.3 Production syntaxTerms
// coreSyntaxTerms | rdf:Description | rdf:li
private static Set<String> $syntaxTerms =
Set.of(rdfRDF, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype,
rdfDescription, rdfContainerItem);
// 6.2.4 Production oldTerms
// rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID
private static Set<String> $oldTerms = Set.of(rdfAboutEach, rdfAboutEachPrefix, rdfBagID);
private static Set<String> disallowedPropertyAttributeTerms =
Set.of(rdfRDF, rdfID, rdfAbout, rdfParseType, rdfResource, rdfNodeID, rdfDatatype,
rdfDescription, rdfContainerItem, "aboutEach", "aboutEachPrefix", "bagID");
// 6.2.5 Production nodeElementURIs
// anyURI - ( coreSyntaxTerms | rdf:li | oldTerms )
private static boolean allowedNodeElementURIs(String namespace, String localName) {
if ( ! rdfNS.equals(namespace) )
return true;
if ( $coreSyntaxTerms.contains(localName) )
return false;
if ( rdfContainerItem.equals(localName) )
return false;
if ( $oldTerms.contains(localName) )
return false;
return true;
}
// 6.2.6 Production propertyElementURIs
// anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms )
private static boolean allowedPropertyElementURIs(String namespace, String localName) {
if ( ! rdfNS.equals(namespace) )
return true;
if ( $coreSyntaxTerms.contains(localName) )
return false;
if ( rdfDescription.equals(localName) )
return false;
if ( $oldTerms.contains(localName) )
return false;
return true;
}
// 6.2.7 Production propertyAttributeURIs
// anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms )
private static boolean allowedPropertyAttributeURIs(String namespace, String localName) {
if ( ! rdfNS.equals(namespace) )
return true;
if ( $coreSyntaxTerms.contains(localName) )
return false;
if ( rdfDescription.equals(localName) )
return false;
if ( rdfContainerItem.equals(localName) )
return false;
if ( $oldTerms.contains(localName) )
return false;
return true;
}
/** The attributes that guide the RDF/XML parser. */
private static Set<String> $rdfSyntaxAttributes =
Set.of(rdfRDF, rdfAbout, rdfNodeID, rdfID, rdfParseType, rdfDatatype, rdfResource);
private boolean isSyntaxAttribute(String namespace, String localName) {
if ( ! rdfNS.equals(namespace) )
return false;
return $rdfSyntaxAttributes.contains(localName);
}
private static Set<String> $xmlReservedTerms = Set.of(xmlBaseLN, xmlLangLN, xmlSpaceLN);
/** Recognized XML namespace Qname */
private static boolean isXMLQName(String namespace, String localName) {
if ( ! isXMLNamespace(namespace) )
return false;
return $xmlReservedTerms.contains(localName);
}
private static boolean isXMLNamespace(String namespace) {
return xmlNS.equals(namespace);
}
private static boolean isXMLNamespaceQName(String qName) {
if ( qName != null && (qName.equals("xmlns") || qName.startsWith("xmlns:")) )
return true;
return false;
}
// ---- Parser internal
private record Position(int line, int column) {}
private static String str(Position position) {
if ( position == null )
return "[-,-]";
if ( position.line() < 0 && position.column() < 0 )
return "[?,?]";
if ( position.column() < 0 )
return String.format("[-, Col: %d]", position.line());
if ( position.line() < 0 )
return String.format("[Line: %d, -]", position.column());
return String.format("[Line: %d, Col: %d]", position.line(), position.column());
}
// ---- Parser internal
// Single element node is "(!hasRDF && parserMode=NodeElement)"
/**
* ParserMode This directs the code at stateElement and how to complete at
* endElement.
*/
private enum ParserMode {
// The first startElement is rdf:RDF, with multiple children,
// or it is a single NodeElement.
TOP,
// Top level element, parseType=resource (implicit) or a nested startElement
// inside a property.
NodeElement,
// Looking for the start that set "property" and parserType
// If the immediately next tag is a start, it's a property.
// If at the end of a property-value, an immediately starting tag is another
// property-value.
PropertyElement,
// Within a property, gathering the lexical form for the object.
ObjectLex,
// The node implied by rdf:parseType=Resource
ObjectParserTypeResource,
// The object is rdf:parseType=Literal. Collecting characters of a RDF XML Literal
ObjectParseTypeLiteral,
// The object is rdf:parseType=Collection (RDF List)
ObjectParseTypeCollection
}
/** Integer holder for rdf:li */
private static class Counter { int value = 1; }
/** Node holder for collection items. Holds the node for the last item added in the collection at this level. */
private static class NodeHolder { Node node = null; }
/** rdf:parseType for objects, with a default "Lexical" case */
private enum ObjectParseType { Literal, Collection, Resource,
// This is a extra parseType to indicate the "no ParseType" case
// which is a plain lexical or nested resource.
Plain }
// ---- Parser output
interface Emitter { void emit(Node subject, Node property, Node object, Position position); }
// ---- Parser state
private record ParserFrame(IRIx base, String lang,
Node subject, Node property,
Counter containerPropertyCounter,
NodeHolder collectionNode,
Emitter emitter,
ParserMode parserMode
) {}
private Deque<ParserFrame> parserStack = new ArrayDeque<>();
// Normal case
private void pushParserFrame() {
pushParserFrame(parserMode);
}
// Called directly when ObjectLex turns out to be a resource object after all.
private void pushParserFrame(ParserMode frameParserMode) {
if ( TRACE )
trace.printf("Push frame: S: %s P: %s -- mode=%s\n",
str(currentSubject), str(currentProperty), frameParserMode);
ParserFrame frame = new ParserFrame(currentBase, currentLang,
currentSubject, currentProperty,
containerPropertyCounter,
collectionNode,
currentEmitter,
frameParserMode);
parserStack.push(frame);
}
private void popParserFrame() {
ParserFrame frame = parserStack.pop();
if ( TRACE ) {
trace.printf("Pop frame: S: %s -> %s : P: %s -> %s\n", str(currentSubject), frame.subject,
str(currentProperty), frame.property);
}
this.currentBase = frame.base;
this.currentLang = frame.lang;
this.currentSubject = frame.subject;
this.currentProperty = frame.property;
this.currentEmitter = frame.emitter;
this.collectionNode = frame.collectionNode;
this.containerPropertyCounter = frame.containerPropertyCounter;
this.parserMode = frame.parserMode;
// If this frame is ParserMode.ObjectResource , then it is an implicit frame
// inserted for the implied node. Pop the stack again to balance the push of
// the implicit node element.
if ( parserMode == ParserMode.ObjectParserTypeResource ) {
popParserFrame();
decIndent();
}
}
private static String str(Node node) {
if ( node == null )
return "null";
return NodeFmtLib.displayStr(node);
}
// ---- Error handlers
private RiotException RDFXMLparseError(String message, Position position) {
if ( position != null )
errorHandler.error(message, position.line(), position.column());
else
errorHandler.error(message, -1, -1);
// The error handler normally does this but for RDF/XML parsing it is required.
return new RiotException(fmtMessage(message, position.line(), position.column())) ;
}
private void RDFXMLparseWarning(String message, Position position) {
if ( position != null )
errorHandler.warning(message, position.line(), position.column());
else
errorHandler.warning(message, -1, -1);
}
// ---- Parser Setup
private final ParserProfile parserProfile;
private final FactoryRDF factory;
private final Context context;
private final org.apache.jena.riot.system.ErrorHandler errorHandler;
private final String initialXmlBase;
private final String initialXmlLang;
private final StreamRDF destination;
// Tracking for ID on nodes (not reification usage)
// We limit the number of local fragment IDs tracked because map only grows.
// A base URI may be re-introduced so this isn't nested scoping.
private int countTrackingIDs = 0;
private Map<IRIx, Map<String, Position>> trackUsedIDs = new HashMap<>();
private Position previousUseOfID(String idStr, Position position) {
Map<String, Position> scope = trackUsedIDs.computeIfAbsent(currentBase, k->new HashMap<>());
Position prev = scope.get(idStr);
if ( prev != null )
return prev;
if ( countTrackingIDs > 10000 )
return null;
scope.put(idStr, position);
countTrackingIDs++;
return null;
}
// -- The XML state
private Locator locator = null;
// ---- Parser state
// Structure.
private boolean hasRDF = false;
private boolean hasDocument = false;
// Not needed on the stack because it is only used for non-nesting object lexical.
private RDFDatatype datatype;
// Collecting characters does not need to be a stack because there are
// no nested objects while gathering characters for lexical or XMLLiterals.
private StringBuilder accCharacters = new StringBuilder(100);
// Element depth is incremented at the end of "startElement" and decremented at
// the beginning of "endElement". Used for collecting XML Literals.
private int elementDepth = 0;
private void incElementDepth() {
if ( TRACE && VERBOSE )
trace.printf("~~ incElementDepth %d -> %d\n", elementDepth, elementDepth + 1);
elementDepth++;
}
private void decElementDepth() {
if ( TRACE && VERBOSE )
trace.printf("~~ decElementDepth %d -> %d\n", elementDepth, elementDepth - 1);
--elementDepth;
}
// Level at which we started collecting an XML Literal.
private int xmlLiteralStartDepth = -1;
// Parser stack frame items.
private IRIx currentBase;
private String currentLang = null;
private Node currentSubject = null;
private Node currentProperty = null;
private Counter containerPropertyCounter = null; // For rdf:li
private NodeHolder collectionNode = null; // For parseType=Collection
private Emitter currentEmitter = null;
private ParserMode parserMode = ParserMode.TOP;
private void parserMode(ParserMode parserMode) {
this.parserMode = parserMode;
}
// // Forming objects.
// private ParseType parseType = null;
public ParserRDFXML_SAX(String xmlBase, ParserProfile parserProfile, StreamRDF destination, Context context) {
// Debug
if ( TRACE ) {
IndentedWriter out1 = IndentedWriter.stdout.clone();
out1.setFlushOnNewline(true);
out1.setUnitIndent(4);
out1.setLinePrefix("# ");
this.trace = out1;
//IndentedWriter out2 = IndentedWriter.stdout.clone().setFlushOnNewline(true).setUnitIndent(4).setLinePrefix("! ");
} else {
this.trace = null;
}
this.traceXML = this.trace;
EVENTS = TRACE;
// Debug
this.parserProfile = parserProfile;
this.factory = parserProfile.getFactorRDF();
this.errorHandler = parserProfile.getErrorHandler();
this.context = context;
this.initialXmlBase = xmlBase;
this.initialXmlLang = "";
if ( xmlBase != null ) {
this.currentBase = IRIx.create(xmlBase);
parserProfile.setBaseIRI(currentBase.str());
} else {
this.currentBase = null;
}
this.currentLang = "";
this.destination = destination;
}
// ---- ContentHandler
@Override
public void startDocument() throws SAXException {
if ( TRACE )
traceXML.println("Doc start");
hasDocument = true;
}
@Override
public void endDocument() throws SAXException {
if ( TRACE )
traceXML.println("Doc end");
}
@Override
public void startElement(final String namespaceURI, final String localName, String qName, Attributes attributes) {
if ( xmlLiteralCollecting() ) {
if ( TRACE )
trace.printf("startElement: XML Literal[%s]: depth = %d\n", qName, elementDepth);
xmlLiteralCollectStartElement(namespaceURI, localName, qName, attributes);
return;
}
if ( TRACE ) {
trace.printf("%s StartElement(%s", here(), qName);
for ( int i = 0 ; i < attributes.getLength() ; i++ ) {
String x = attributes.getQName(i);
String v = attributes.getValue(i);
trace.printf(", %s=%s", x, attributes.getValue(i));
}
trace.printf(") mode = %s\n", parserMode);
}
incIndent();
Position position = position();
if ( TRACE )
trace.printf("StartElement parserMode=%s\n", parserMode);
// Special case.
// Gathering characters for an object lexical form, then encountering a start element
// which is a resource. This is the only case of lookahead in the RDF/XML grammar.
switch (parserMode) {
case ObjectLex:
// While processing ObjectLex, we found a startElement.
// The "ObjectLex" decision needs updating. This is a ParserMode.NodeElement.
// This is not parseType=Resource.
if ( !isWhitespace(accCharacters) )
throw RDFXMLparseError("XML content before nested element", position);
accCharacters.setLength(0);
// Declare that the containing frame is expecting a node element mode.
// Leave in parserMode=ObjectLex
pushParserFrame(ParserMode.NodeElement);
processBaseAndLang(attributes, position);
break;
default:
// For everything else.
pushParserFrame();
processBaseAndLang(attributes, position);
}
switch (parserMode) {
case TOP:
// Document element: Either a one element fragment or rdf:RDF
// rdf:RDF => nodeElementList
// nodeElementList = ws* (nodeElement ws* )* or nodeElement
if ( qNameMatches(rdfNS, rdfRDF, namespaceURI, localName) ) {
// Emits declarations.
processBaseAndLang(attributes, position);
rdfRDF(namespaceURI, localName, qName, attributes, position);
return;
}
// The top element can be a single nodeElement.
startNodeElement(namespaceURI, localName, qName, attributes, position);
break;
case NodeElement:
startNodeElement(namespaceURI, localName, qName, attributes, position);
break;
case PropertyElement:
startPropertyElement(namespaceURI, localName, qName, attributes, position);
break;
case ObjectLex:
// Finish ObjectLex. Generate the triple.
Node innerSubject = attributesToSubjectNode(attributes, position);
currentEmitter.emit(currentSubject, currentProperty, innerSubject, position);
// This is an rdf:Description or a typed node element.
startNodeElementWithSubject(innerSubject, namespaceURI, localName, qName, attributes, position);
break;
case ObjectParseTypeLiteral:
// Handled on entry.
throw RDFXMLparseError("Unexpected parserMode " + parserMode, position);
case ObjectParseTypeCollection:
startCollectionItem(namespaceURI, localName, qName, attributes, position);
break;
default:
break;
}
incElementDepth();
}
@Override
public void endElement(String namespaceURI, String localName, String qName) {
if ( qNameMatches(rdfNS, rdfRDF, namespaceURI, localName) ) {
decIndent();
parserMode(ParserMode.TOP);
return;
}
if ( TRACE ) {
decIndent();
trace.printf("%s enter endElement(%s) mode = %s\n", here(), qName, parserMode);
incIndent();
}
Position position = position();
if ( xmlLiteralCollecting() ) {
if ( TRACE )
trace.printf("Collecting: elementDepth=%d / xmlLiteralStartDepth=%s\n", elementDepth, xmlLiteralStartDepth);
if ( elementDepth-1 > xmlLiteralStartDepth ) {
if ( TRACE )
trace.print("Continue collecting\n");
xmlLiteralCollectEndElement(namespaceURI, localName, qName);
return;
}
endXMLLiteral(position);
if ( TRACE )
trace.printf("**** End XML Literal[%s]: elementDepth=%d / xmlLiteralStartDepth=%s\n", qName, elementDepth, xmlLiteralStartDepth);
// Keep going to finish the end tag.
}
switch (parserMode) {
case NodeElement:
endNodeElement(position);
break;
case PropertyElement:
if ( isEndNodeElement() )
// Possible next property but it's a node element so no property
// and it's end of node, with two "end property" tags seen in a row.
endNodeElement(position);
else
endPropertyElement(position);
// How to tell this is end of the properties?
break;
case ObjectLex:
endObjectLexical(position);
break;
case ObjectParseTypeLiteral:
endObjectXMLLiteral(position);
break;
case ObjectParseTypeCollection:
endCollectionItem(position);
break;
default:
throw RDFXMLparseError("Inconsistent parserMode:" + parserMode, position);
}
popParserFrame();
decIndent();
decElementDepth();
if ( TRACE )
trace.printf("%s EndElement(%s) mode = %s\n", here(), qName, parserMode);
}
private void rdfRDF(String namespaceURI, String localName, String qName, Attributes attributes, Position position) {
if ( TRACE )
trace.println("rdf:RDF");
if ( hasRDF )
throw RDFXMLparseError("Nested rdf:RDF", position);
if ( elementDepth != 0 )
throw RDFXMLparseError("rdf:RDF not at top level", position);
String xmlBaseURI = attributes.getValue(xmlNS, xmlBaseLN);
if ( xmlBaseURI != null ) {
emitBase(xmlBaseURI, position);
currentBase = resolveIRIx(xmlBaseURI, position);
}
for ( int i = 0 ; i < attributes.getLength() ; i++ ) {
String x = attributes.getQName(i);
if ( x.startsWith("xmlns") ) {
String prefix;
String prefixURI;
if ( x.equals("xmlns") ) {
prefix = "";
prefixURI = attributes.getValue(i);
emitPrefix(prefix, prefixURI, position);
} else if ( x.startsWith("xmlns:") ) {
prefix = x.substring("xmlns:".length());
prefixURI = attributes.getValue(i);
emitPrefix(prefix, prefixURI, position);
}
// xmlns.... - Not an xmlns after all.
}
}
hasRDF = true;
parserMode(ParserMode.NodeElement);
}
/* ++ nodeElement
*
* start-element(URI == nodeElementURIs attributes == set((idAttr | nodeIdAttr |
* aboutAttr )?, propertyAttr*)) propertyEltList end-element()
*
* ++ nodeElementURIs anyURI - ( coreSyntaxTerms | rdf:li | oldTerms ) */
private void startNodeElement(String namespaceURI, String localName, String qName, Attributes attributes, Position position) {
// Top level object - maybe inside rdf:RDF
Node thisSubject = attributesToSubjectNode(attributes, position);
startNodeElementWithSubject(thisSubject, namespaceURI, localName, qName, attributes, position);
}
// Subject already determined - e.g. needed for inner resource.
private void startNodeElementWithSubject(Node thisSubject,
String namespaceURI, String localName, String qName, Attributes attributes,
Position position) {
if ( TRACE )
trace.printf("Start nodeElement: subject = %s\n", str(thisSubject));
currentSubject = thisSubject;
containerPropertyCounter = new Counter();
// Check allowed tag name.
if ( ! allowedNodeElementURIs(namespaceURI, localName) )
throw RDFXMLparseError("Not allowed as a node element tag: '"+qName+"'", position);
if ( ! qNameMatches(rdfNS, rdfDescription, namespaceURI, localName) ) {
// Typed Node Element
if ( isMemberProperty(namespaceURI, localName) )
RDFXMLparseWarning(qName+" is being used on a typed node", position);
else {
if ( isNotRecognizedRDFtype(namespaceURI, localName) )
RDFXMLparseWarning(qName+" is not a recognized RDF term for a type", position);
}
Node object = qNameToIRI(namespaceURI, localName, position);
emit(currentSubject, RDF.Nodes.type, object, position);
}
if ( hasPropertyAttributes(attributes, position) )
processPropertyAttributes(currentSubject, attributes, position);
parserMode(ParserMode.PropertyElement);
}
private void endNodeElement(Position position) {
if ( TRACE )
trace.println("endNodeElement. ParserMode = "+parserMode);
}
private void startPropertyElement(String namespaceURI, String localName, String qName, Attributes attributes, Position position) {
if ( TRACE )
trace.printf("Start propertyElement: subject = %s\n", str(currentSubject));
if ( ! allowedPropertyElementURIs(namespaceURI, localName) )
throw RDFXMLparseError("QName not allowed for property: "+qName, position);
if ( isNotRecognizedRDFproperty(namespaceURI, localName) )
RDFXMLparseWarning(qName+" is not a recognized RDF property", position);
if ( qNameMatches(rdfNS, rdfContainerItem, namespaceURI, localName) ) {
int i = containerPropertyCounter.value++;
String p = rdfNS+"_"+i;
currentProperty = iri(p, position);
} else
currentProperty = qNameToIRI(namespaceURI, localName, position);
if ( TRACE )
trace.printf("Property = %s\n", str(currentProperty));
String dt = attributes.getValue(rdfNS, rdfDatatype);
datatype = (dt != null) ? NodeFactory.getType(dt) : null;
currentEmitter = maybeReifyStatement(attributes, position);
// Resource object and subject of further triples.
// This will be checked for a valid IRI later.
String rdfResourceStr = attributes.getValue(rdfNS, rdfResource);
// Checked if the blank node is created.
String objBlankNodeLabel = attributes.getValue(rdfNS, rdfNodeID);
String parseTypeStr = attributes.getValue(rdfNS, rdfParseType);
Node resourceObj = null;
if ( rdfResourceStr != null && objBlankNodeLabel != null )
throw RDFXMLparseError("Both rdf:resource and rdf:NodeId on a property element. Only one allowed", position);
if ( rdfResourceStr != null && parseTypeStr != null )
throw RDFXMLparseError("Both rdf:resource and rdf:ParseType on a property element. Only one allowed", position);
if ( objBlankNodeLabel != null && parseTypeStr != null )
throw RDFXMLparseError("Both rdf:NodeId and rdf:ParseType on a property element. Only one allowed", position);
if ( rdfResourceStr != null )
resourceObj = iriResolve(rdfResourceStr, position);
if ( objBlankNodeLabel != null )
resourceObj = blankNode(objBlankNodeLabel, position);
if ( hasPropertyAttributes(attributes, position) ) {
if ( parseTypeStr != null ) {
// rdf:parseType found.
throw RDFXMLparseError("The attribute rdf:parseType is not permitted with property attributes on a property element: "+qName, position);
}
// AND must be empty tag
Node innerSubject = (resourceObj==null) ? blankNode(position) : resourceObj;
processPropertyAttributes(innerSubject, attributes, position);
currentEmitter.emit(currentSubject, currentProperty, innerSubject, position);
return;
}
if ( resourceObj != null ) {
currentEmitter.emit(currentSubject, currentProperty, resourceObj, position);
// And empty tag.
return;
}
ObjectParseType objectParseType = objectParseType(parseTypeStr, position);
switch (objectParseType) {
case Plain:
parserMode(ParserMode.ObjectLex);
accCharacters.setLength(0);
// This may turn into a resource object if a startTag is encountered next.
break;
case Resource:
// Change of subject to a blank node subject.
Node nested = blankNode(position);
if ( TRACE )
trace.printf("Subject = %s\n", str(nested));
currentEmitter.emit(currentSubject, currentProperty, nested, position);
// Clear property now it's been used.
currentProperty = null;
// ... reset the subject
currentSubject = nested;
// There isn't a startElement, endElement pair for parseType=Resource.
// Push a frame here as an implicit node frame because the subject is changing.
// The companion "end frame" is handled in "popParserFrame" which
// checks for parserMode=ImplicitNode
parserMode(ParserMode.ObjectParserTypeResource);
pushParserFrame();
// ... expect a property element start or an end element.
parserMode(ParserMode.PropertyElement);
// There is nothing else special to do other than the implicit pop.
break;
case Literal:
startXMLLiteral(position);
break;
case Collection:
parserMode(ParserMode.ObjectParseTypeCollection);
collectionNode = new NodeHolder();
break;
}
}
private void endPropertyElement(Position position) {
if ( TRACE )
trace.println("endPropertyElement");
}
private boolean isEndNodeElement() {
return currentProperty == null;
}
// private String xmlBaseStr(Attributes attributes, Position position) {
// String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
// if ( baseStr == null )
// return null;
// return IRIs.resolve(currentBase, baseStr);
// }
// Start element encountered when expecting a ObjectCollection
private void startCollectionItem(String namespaceURI, String localName, String qName, Attributes attributes, Position position) {
// Finish last list cell, start new one.
if ( TRACE )
trace.println("Generate list cell");
// Preceding cell in list.
Node previousCollectionNode = collectionNode.node;
Node thisCollectionNode = blankNode(position);
// New cell in list.
// Either link up to the origin or fixup previous cell.
if ( previousCollectionNode == null )
currentEmitter.emit(currentSubject, currentProperty, thisCollectionNode, position);
else
emit(previousCollectionNode, Nodes.rest, thisCollectionNode, position);
collectionNode.node = thisCollectionNode;
// Start the item.
Node itemSubject = attributesToSubjectNode(attributes, position);
emit(thisCollectionNode, RDF.Nodes.first, itemSubject, position);
startNodeElementWithSubject(itemSubject, namespaceURI, localName, qName, attributes, position);
}
private void endCollectionItem(Position position) {
if ( TRACE )
trace.println("endObjectCollectionItem");
if ( collectionNode.node != null ) {
emit(collectionNode.node, Nodes.rest, Nodes.nil, position);
} else {
// Empty list
emit(currentSubject, currentProperty, Nodes.nil, position);
}
}
private void endObjectLexical(Position position) {
if ( TRACE )
trace.println("endObjectLexical");
Node object = generateLiteral(position);
currentEmitter.emit(currentSubject, currentProperty, object, position);
// Finished a triple.
accCharacters.setLength(0);
}
private void endObjectXMLLiteral(Position position) {
if ( TRACE )
trace.println("endObjectXMLLiteral");
Node object = generateXMLLiteral(position);
currentEmitter.emit(currentSubject, currentProperty, object, position);
namespaces = Map.of();
stackNamespaces.clear();
accCharacters.setLength(0);
}
/** Subject for a node element */
private Node attributesToSubjectNode(Attributes attributes, Position position) {
// Subject
//
// If there is an attribute a with a.URI == rdf:ID, then e.subject :=
// uri(identifier := resolve(e, concat("#", a.string-value))).
//
// If there is an attribute a with a.URI == rdf:nodeID, then e.subject :=
// bnodeid(identifier:=a.string-value).
//
// If there is an attribute a with a.URI == rdf:about then e.subject :=
// uri(identifier := resolve(e, a.string-value)).
//
// Text quoted is implicitly "latter overrides former" but it seems ARP generate an error.
// This will be resolved and checked for a valid IRI later.
String iriStr = attributes.getValue(rdfNS, rdfAbout);
// Checked when the blank node is created.
String idStr = attributes.getValue(rdfNS, rdfID);
// Checked when the blank node is created.
String blankNodelabel = attributes.getValue(rdfNS, rdfNodeID);
if ( blankNodelabel != null && iriStr != null && blankNodelabel != null )
throw RDFXMLparseError("All of rdf:about, rdf:NodeId and rdf:ID found. Must be only one.", position);
if ( iriStr != null && idStr != null )
throw RDFXMLparseError("Both rdf:about and rdf:ID found. Must be only one.", position);
if ( blankNodelabel != null && iriStr != null )
throw RDFXMLparseError("Both rdf:about and rdf:NodeID found. Must be only one.", position);
if ( blankNodelabel != null && idStr != null )
throw RDFXMLparseError("Both rdf:NodeID rdf:ID found. Must be only one.", position);
if ( iriStr != null )
return iriResolve(iriStr, position);
if ( idStr != null )
return iriFromID(idStr, position);
if ( blankNodelabel != null )
return blankNode(blankNodelabel, position);
// None of the above. It's a fresh blank node.
return blankNode(position);
}
private void processBaseAndLang(Attributes attributes, Position position) {
// Too early.
IRIx base = xmlBase(attributes, position);
if ( base != null ) {
currentBase = base;
}
String lang = xmlLang(attributes, position);
if ( lang != null )
currentLang = lang;
}
// Property attributes.
// The checking is done by the call to hasPropertyAttributes.
private void processPropertyAttributes(Node subject, Attributes attributes, Position position) {
for ( int i = 0 ; i < attributes.getLength() ; i++ ) {
boolean isPropertyAttribute = checkPropertyAttribute(attributes, i, false, position);
if ( ! isPropertyAttribute )
continue;
String namespace = attributes.getURI(i);
String localName = attributes.getLocalName(i);
String qName = attributes.getQName(i);
propertyAttribute(subject, attributes, i, position);
}
}
// Early abort! But also used to avoid creating a Node for
// processPropertyAttributes which has no work to do.
private boolean hasPropertyAttributes(Attributes attributes, Position position) {
for ( int i = 0 ; i < attributes.getLength() ; i++ ) {
boolean isPropertyAttribute = checkPropertyAttribute(attributes, i, true, position);
if ( ! isPropertyAttribute )
continue;
return true;
}
return false;
}
/** Return true if this is a property attribute. */
private boolean checkPropertyAttribute(Attributes attributes, int index, boolean outputWarnings, Position position) {
String namespace = attributes.getURI(index);
String localName = attributes.getLocalName(index);
String qName = attributes.getQName(index);
if ( namespace == null || namespace.isEmpty() ) {
if ( outputWarnings ){
//In SAX, xmlns: is qname, but namespace and local name are "".
if ( ! localName.isEmpty() ) // Skip XML namespace declarations.
RDFXMLparseWarning("XML attribute '"+qName+"' used for RDF property attribute - ignored", position);
}
return false;
}
if ( isSyntaxAttribute(namespace, localName) )
return false;
if ( ! allowedPropertyAttributeURIs(namespace, localName) )
throw RDFXMLparseError("Not allowed as a property attribute: '"+attributes.getQName(index)+"'", position);
if ( outputWarnings && isNotRecognizedRDFproperty(namespace, localName) )
RDFXMLparseWarning(qName+" is not a recognized RDF term for a property attribute", position);
if ( isXMLQName(namespace, localName) )
return false;
if ( isXMLNamespace(namespace) ) {
// Unrecognized qnames in the XMLnamespace are a warning and are ignored.
RDFXMLparseWarning("Unrecognized XML attribute: '"+attributes.getQName(index)+"'", position);
return false;
}
if ( isXMLNamespaceQName(qName) )
return false;
return true;
}
/** Output for a property attribute (already checked) */
private void propertyAttribute(Node subject, Attributes attributes, int index, Position position) {
String namespace = attributes.getURI(index);
String localName = attributes.getLocalName(index);
String value = attributes.getValue(index);
if ( rdfNS.equals(namespace) ) {
if ( rdfType.equals(localName) ) {
Node type = iriResolve(value, position);
emit(subject, Nodes.type, type, position);
return;
}
}
Node property = qNameToIRI(namespace, localName, position);
String lex = value;
Node object = literal(lex, currentLang, position);
emit(subject, property, object, position);
}
private IRIx xmlBase(Attributes attributes, Position position) {
String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
return xmlBase(baseStr);
}
private IRIx xmlBase(String baseStr) {
if ( baseStr == null )
return null;
if ( currentBase == null )
return null;
return currentBase.resolve(baseStr);
}
private String xmlBaseStr(Attributes attributes, Position position) {
String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
if ( baseStr == null )
return null;
return IRIs.resolve(currentBase, baseStr);
}
private String xmlLang(Attributes attributes, Position position) {
String langStr = attributes.getValue(xmlNS, xmlLangLN);
if ( langStr == null )
return null; // We use null for "no language" so that explicit
// xml:lang="" is different.
return langStr;
}
private ObjectParseType objectParseType(String parseTypeStr, Position position) {
if ( parseTypeStr == null )
return ObjectParseType.Plain;
try {
return ObjectParseType.valueOf(parseTypeStr);
} catch (IllegalArgumentException ex) {
throw RDFXMLparseError("Not a legal value for rdf:parseType: '"+parseTypeStr+"'", position);
}
}
// Whether to generate the reification as well.
private Emitter maybeReifyStatement(Attributes attributes, Position position) {
// Checked when the resolved IRI is created.
String reifyId = attributes.getValue(rdfNS, rdfID);
if ( reifyId == null )
return this::emit;
Node reify = iriFromID(reifyId, position);
return (s, p, o, loc) -> emitReify(reify, s, p, o, loc);
}
// private String xmlBaseStr(Attributes attributes, Position position) {
// String baseStr = attributes.getValue(xmlNS, xmlBaseLN);
// if ( baseStr == null )
// return null;
// return IRIs.resolve(currentBase, baseStr);
// }
private Node generateLiteral(Position position) {
String lex = accCharacters.toString();
if ( datatype != null )
return literalDatatype(lex, datatype, position);
else
return literal(lex, currentLang, position);
}
private Node generateXMLLiteral(Position position) {
String lex = xmlLiteralCollectText();
return literalDatatype(lex, rdfXmlLiteralDT, position);
}
// ---- SAX
@Override
public void setDocumentLocator(Locator locator) {
if ( EVENTS )
traceXML.println("setDocumentLocator");
this.locator = locator;
}
/**
* Empty Return an immutable location for the current position in the parse
* stream.
*/
private Position position() {
// calling it "Location" is unhelpful - automatic imports keeps finding the
// StAX javax.xml.stream.Location!
return new Position(locator.getLineNumber(), locator.getColumnNumber());
}
// These happen before startElement.
@Override
public void startPrefixMapping(String prefix, String uri) throws SAXException {
if ( TRACE )
trace.printf("startPrefixMapping: %s: <%s>\n", prefix, uri);
// Output only the top level prefix mappings.
// Done in startElement to test for rdf:RDF
}
@Override
public void endPrefixMapping(String prefix) throws SAXException {
if ( TRACE )
trace.printf("endPrefixMapping: %s\n", prefix);
}
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if ( xmlLiteralCollecting() ) {
xmlLiteralCollectCharacters(ch, start, length);
return;
}
switch (parserMode) {
case ObjectLex:
accCharacters.append(ch, start, length);
return;
case ObjectParseTypeLiteral:
// Dealt with above.
return;
// Allow whitespace only
case ObjectParserTypeResource:
case NodeElement:
case PropertyElement:
case ObjectParseTypeCollection:
if ( !isWhitespace(ch, start, length) )
throw RDFXMLparseError("Non-whitespace text content between element tags: "
+ nonWhitespaceForMsg(ch, start, length), position());
break;
case TOP:
if ( !isWhitespace(ch, start, length) ) {
throw RDFXMLparseError("Non-whitespace text content outside element tags: "
+ nonWhitespaceForMsg(ch, start, length), position());
}
break;
}
}
/** The string for the first non-whitespace index. */
private static String nonWhitespaceForMsg(char[] ch, int start, int length) {
for ( int i = start ; i < start + length ; i++ ) {
if ( !Character.isWhitespace(ch[i]) ) {
int len = Math.min(20, start - i);
return new String(ch, i, len);
}
}
throw new RDFXMLParseException("Failed to find any non-whitespace characters");
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
if ( TRACE )
traceXML.println("ignorableWhitespace");
}
private static boolean isWhitespace(char[] ch, int start, int length) {
for ( int i = start ; i < start + length ; i++ ) {
char ich = ch[i];
if ( !Character.isWhitespace(ich) )
return false;
}
return true;
}
private static boolean isWhitespace(CharSequence chars) {
for ( int i = 0 ; i < chars.length() ; i++ ) {
char ich = chars.charAt(i);
if ( !isWhitespace(ich) )
return false;
}
return true;
}
private static boolean isWhitespace(char ch) {
return Character.isWhitespace(ch);
}
private String here() {
Position position = position();
if ( position == null )
return "[?, ?]";
return String.format("[line:%d, col:%d]", position.line(), position.column());
}
// ---- Parser output
private void emit(Node subject, Node property, Node object, Position position) {
Objects.requireNonNull(subject, "subject");
Objects.requireNonNull(property, "property");
Objects.requireNonNull(object, "object");
Objects.requireNonNull(position, "position");
// out.printf("Triple: %s %s %s %s\n", str(position), str(subject),
// str(property), str(object));
destination.triple(Triple.create(subject, property, object));
}
private void emitReify(Node reify, Node subject, Node property, Node object, Position position) {
emit(subject, property, object, position);
if ( reify != null ) {
emit(reify, Nodes.type, Nodes.Statement, position);
emit(reify, Nodes.subject, subject, position);
emit(reify, Nodes.predicate, property, position);
emit(reify, Nodes.object, object, position);
}
}
private void emitBase(String base, Position position) {
destination.base(base);
}
private void emitPrefix(String prefix, String iriStr, Position position) {
destination.prefix(prefix, iriStr);
}
// ---- Creating terms.
private Node qNameToIRI(String namespace, String localName, Position position) {
String uriStr = qNameToIRI(namespace, localName);
return iri(uriStr, position);
}
/** This is the RDF rule for creating an IRI from QName. */
private String qNameToIRI(String namespace, String localName) {
String iriStr = namespace + localName;
return iriStr;
}
private Node iri(String uriStr, Position position) {
Objects.requireNonNull(uriStr);
Objects.requireNonNull(position);
return createURI(uriStr, position);
}
private Node iriFromID(String idStr, Position position) {
checkValidNCName(idStr, position);
Position prev = previousUseOfID(idStr, position);
if ( prev != null )
// Already in use
RDFXMLparseWarning("Reuse of rdf:ID '"+idStr+"' at "+str(prev), position);
Node uri = iriResolve("#"+idStr,position);
return uri;
}
private Node iriResolve(String uriStr, Position position) {
Objects.requireNonNull(uriStr);
Objects.requireNonNull(position);
int line = position.line();
int col = position.column();
String resolved = resolveIRI(uriStr, position);
return createURI(resolved, position);
}
private String resolveIRI(String uriStr, Position position) {
if ( uriStr.startsWith("_:") )
// <_:label> syntax. Handled by the FactoryRDF via the parser profile.
return uriStr;
return resolveIRIx(uriStr, position).str();
}
private IRIx resolveIRIx(String uriStr, Position position) {
try {
if ( currentBase != null )
return currentBase.resolve(uriStr);
IRIx iri = IRIx.create(uriStr);
if ( iri.isRelative() )
throw RDFXMLparseError("Base URI is null, but there are relative URIs to resolve" , position);
return iri;
} catch (IRIException ex) {
throw RDFXMLparseError(ex.getMessage(), position);
}
}
/** Done in accordance to the parser profile policy. */
private Node createURI(String iriStr, Position position) {
int line = position.line();
int col = position.column();
// Checking
return parserProfile.createURI(iriStr, line, col);
}
// private Node iriDirect(String uriStr, Position position) {
// Objects.requireNonNull(uriStr);
// // No checking.
// return createURIdirect(uriStr, position);
// }
//
// /** Always done without checking. */
// private Node createURIdirect(String uriStr, Position position) {
// return factory.createURI(uriStr);
// }
private Node blankNode(Position position) {
Objects.requireNonNull(position);
int line = position.line();
int col = position.column();
return parserProfile.createBlankNode(null, line, col);
}
private void checkValidNCName(String string, Position position) {
//boolean isValid = XMLChar.isValidNCName(string);
boolean isValid = XML11Char.isXML11ValidNCName(string);
if ( ! isValid )
RDFXMLparseWarning("Not a valid XML NCName: '"+string+"'", position);
}
private static boolean isRDF(String namespaceURI) {
return rdfNS.equals(namespaceURI);
}
/** Test for {@code rdf:_NNNNN}. */
private static boolean isMemberProperty(String namespaceURI, String localName) {
if ( ! isRDF(namespaceURI) )
return false;
return isMemberPropertyLocalName(localName);
}
private static boolean isMemberPropertyLocalName(String localName) {
if (localName.startsWith("_")) {
String number = localName.substring(1);
if (number.startsWith("-") || number.startsWith("0"))
return false;
try {
Integer.parseInt(number);
return true;
} catch (NumberFormatException e) {
try {
// It might be > Integer.MAX_VALUE
java.math.BigInteger i = new java.math.BigInteger(number);
return true;
} catch (NumberFormatException ee) {
return false;
}
}
}
return false;
}
// "nil" is in the W3C RDF test suite
private static final Set<String> knownRDF = Set.of
("Bag", "Seq", "Alt", "List","XMLLiteral", "Property", "Statement",
"type", "li", "subject", "predicate","object","value","first","rest", "nil");
private static final Set<String> knownRDFProperties = knownRDF;
private static final Set<String> knownRDFTypes = knownRDF;
/**
* Return false if acceptable: not the RDF namespace, in the RDF namespace but
* recognized for a type.
* If return true, issue a warning.
*/
private boolean isNotRecognizedRDFtype(String namespaceURI, String localName) {
if ( ! isRDF(namespaceURI) )
return false;
return ! knownRDFTypes.contains(localName);
}
private boolean isNotRecognizedRDFproperty(String namespaceURI, String localName) {
if ( ! isRDF(namespaceURI) )
return false;
if ( isMemberPropertyLocalName(localName) )
return false;
return ! knownRDFProperties.contains(localName);
}
private Node blankNode(String label, Position position) {
Objects.requireNonNull(label);
Objects.requireNonNull(position);
// RDF/XML restriction.
checkValidNCName(label, position);
int line = position.line();
int col = position.column();
return parserProfile.createBlankNode(null, label, line, col);
}
private Node literal(String lexical, Position position) {
Objects.requireNonNull(lexical);
Objects.requireNonNull(position);
int line = position.line();
int col = position.column();
return parserProfile.createStringLiteral(lexical, line, col);
}
/**
* Create literal with a language (rdf:langString). If lang is null or "", create
* an xsd:string
*/
private Node literal(String lexical, String lang, Position position) {
if ( lang == null || lang.isEmpty() )
return literal(lexical, position);
Objects.requireNonNull(lexical);
Objects.requireNonNull(position);
int line = position.line();
int col = position.column();
return parserProfile.createLangLiteral(lexical, lang, line, col);
}
private Node literalDatatype(String lexical, String datatype, Position position) {
Objects.requireNonNull(lexical);
Objects.requireNonNull(datatype);
Objects.requireNonNull(position);
int line = position.line();
int col = position.column();
RDFDatatype dt = NodeFactory.getType(datatype);
return parserProfile.createTypedLiteral(lexical, dt, line, col);
}
private Node literalDatatype(String lexical, RDFDatatype datatype, Position position) {
Objects.requireNonNull(lexical);
Objects.requireNonNull(datatype);
Objects.requireNonNull(position);
int line = position.line();
int col = position.column();
return parserProfile.createTypedLiteral(lexical, datatype, line, col);
}
// ---- Functions
private boolean qNameMatches(String ns1, String local1, String ns2, String local2) {
// QName actually ignores prefix for QName.equals.
return Objects.equals(ns1, ns2) && Objects.equals(local1, local2);
}
// ---- Development
private void incIndent() {
if ( TRACE )
trace.incIndent();
}
private void decIndent() {
if ( TRACE )
trace.decIndent();
}
// ---- RDF XML Literal
private void startXMLLiteral(Position position) {
if ( TRACE )
trace.printf("Start XML Literal : depth=%d\n", elementDepth);
incIndent();
parserMode(ParserMode.ObjectParseTypeLiteral);
xmlLiteralStartDepth = elementDepth;
accCharacters.setLength(0);
}
private void endXMLLiteral(Position position) {
decIndent();
if ( TRACE )
trace.printf("End XML Literal : depth=%d\n", elementDepth);
xmlLiteralStartDepth = -1;
}
private String xmlLiteralCollectText() {
String lexical = xmlEscapeStrText(accCharacters);
return lexical;
}
/** Building an RDF XML Literal. */
private boolean xmlLiteralCollecting() {
return xmlLiteralStartDepth > 0;
}
/** XML text, not XML Literal, not in an attribute. */
private String xmlEscapeStrText(CharSequence stringAcc) {
// Nothing to do.
return stringAcc.toString();
}
// ---- RDF XML Literals
private static final String openStartTag = "<";
private static final String closeStartTag = ">";
private static final String openEndTag = "</";
private static final String closeEndTag = ">";
private Map<String, String> namespaces = Map.of();
private Deque<Map<String, String>> stackNamespaces = new ArrayDeque<>();
private void xmlLiteralCollectStartElement(String namespaceURI, String localName, String qName, Attributes attributes) {
if ( TRACE )
trace.printf("XML Literal[%s]: depth=%d\n", qName, elementDepth);
incIndent();
incElementDepth();
stackNamespaces.push(namespaces);
namespaces = new HashMap<>(namespaces);
Map<String, String> outputNS = new TreeMap<>();
accCharacters.append(openStartTag);
accCharacters.append(qName);
xmlLiteralNamespaces(outputNS, namespaceURI, localName, qName, attributes);
xmlLiteralAttributes(attributes);
accCharacters.append(closeStartTag);
}
private void xmlLiteralNamespaces(Map<String, String> outputNS, String namespaceURI, String localName, String qName, Attributes attributes) {
xmlLiteralNamespacesForQName(outputNS, namespaceURI, localName, qName);
// Needs more. Determine namespace for attributes
for ( int i = 0 ; i < attributes.getLength() ; i++ ) {
String attrQName = attributes.getQName(i);
String u = attributes.getURI(i);
if ( u.isEmpty() )
continue;
// namespaces handled separately.
if ( attrQName.equals("xmlns") || attrQName.startsWith("xmlns:") )
// Not namespaces.
continue;
xmlLiteralNamespacesForQName(outputNS, attributes.getURI(i), attributes.getLocalName(i), attrQName);
}
// Output.
for ( String prefix : outputNS.keySet() ) {
String uri = outputNS.get(prefix);
// Unset default namespace
if ( uri.isEmpty() )
continue;
accCharacters.append(" ");
accCharacters.append(prefix);
accCharacters.append("=\"");
accCharacters.append(uri);
accCharacters.append("\"");
}
}
/** Process one QName - insert a namespace if the prefix is not in scope as given by the amespace mapping. */
private void xmlLiteralNamespaceQName(Map<String, String> outputNS, Map<String, String> namespaces, NamespaceContext nsCxt, QName qName) {
String prefix = qName.getPrefix();
String namespaceURI = nsCxt.getNamespaceURI(prefix);
if ( ! namespaces.containsKey(prefix) || ! namespaces.get(prefix).equals(namespaceURI) ) {
// Define in current XML subtree.
outputNS.put(prefix, namespaceURI);
namespaces.put(prefix, namespaceURI);
}
}
private void xmlLiteralAttributes(Attributes attributes) {
// Map qname -> index, sorted by qname
Map<String, Integer> attrs = new TreeMap<>();
for ( int i = 0 ; i < attributes.getLength() ; i++ ) {
String attrQName = attributes.getQName(i);
if ( attrQName.equals("xmlns") || attrQName.startsWith("xmlns:") )
// Not namespaces.
continue;
attrs.put(attrQName, i) ;
}
Iterator<Integer> iterAttr = attrs.values().iterator();
while(iterAttr.hasNext()) {
int idx = iterAttr.next();
String name = attributes.getQName(idx);
String value = attributes.getValue(idx);
accCharacters.append(" ");
accCharacters.append(name);
accCharacters.append("=\"");
accCharacters.append(xmlLiteralEscapeAttr(value));
accCharacters.append("\"");
}
}
// ---- RDF Collections
private void xmlLiteralCollectEndElement(String namespaceURI, String localName, String qName) {
accCharacters.append(openEndTag);
accCharacters.append(qName);
accCharacters.append(closeEndTag);
namespaces = stackNamespaces.pop();
decElementDepth();
decIndent();
if ( TRACE )
trace.printf("XML Literal[/%s]: depth=%d\n", qName, elementDepth);
}
private void xmlLiteralCollectCharacters(char[] ch, int start, int length) {
if ( TRACE )
trace.printf("XML Literal Characters: depth=%d\n", elementDepth);
String s = new String(ch, start, length);
s = xmlLiteralEscapeText(s);
accCharacters.append(s);
}
/**
* Note a namespace if not already set.
* It is added to the namespaces recorded and also the output (sorted) set.
*/
private void xmlLiteralNamespacesForQName(Map<String, String> outputNS, String namespaceURI, String localName, String qName) {
int idx = qName.indexOf(':');
String nsAttr;
// Find xmlns key.
if ( idx < 1 ) {
nsAttr = "xmlns";
} else {
nsAttr = "xmlns:"+qName.substring(0, idx);
//nsLocalName = qName.substring(idx+1);
}
// Update the current namespaces mapping.
if ( ! namespaces.containsKey(nsAttr) || !namespaces.get(nsAttr).equals(namespaceURI) ) {
namespaces.put(nsAttr, namespaceURI);
outputNS.put(nsAttr, namespaceURI);
}
}
/**
* Escape text used in an XML content.
* Escapes aligned to ARP.
*/
private String xmlLiteralEscapeText(CharSequence stringAcc) {
StringBuilder sBuff = new StringBuilder();
int len = stringAcc.length() ;
for (int i = 0; i < len; i++) {
char c = stringAcc.charAt(i);
String replace = switch (c) {
case '&' -> "&amp;";
case '<' -> "&lt;";
case '>' -> "&gt;";
//case '"' -> "&quot;";
//case '\'' -> replace = "&apos;";
default -> null;
};
if ( replace == null )
sBuff.append(c);
else
sBuff.append(replace);
}
return sBuff.toString();
}
/**
* Escape text used in an XML attribute value.
* Escapes aligned to ARP.
*/
private String xmlLiteralEscapeAttr(CharSequence stringAcc) {
StringBuilder sBuff = new StringBuilder();
int len = stringAcc.length() ;
for (int i = 0; i < len; i++) {
char c = stringAcc.charAt(i);
String replace = switch (c) {
case '&' -> "&amp;";
case '<' -> "&lt;";
//case '>' -> "&gt;";
case '"' -> "&quot;";
//case '\'' -> replace = "&apos;";
default -> null;
};
if ( replace == null )
sBuff.append(c);
else
sBuff.append(replace);
}
return sBuff.toString();
}
// -- SAX Operations not handled (org.xml.sax.ext.DefaultHandler2)
@Override
public void processingInstruction(String target, String data) throws SAXException {
if ( xmlLiteralCollecting() ) {
accCharacters.append("<?");
accCharacters.append(target);
accCharacters.append(' ');
accCharacters.append(data);
accCharacters.append("?>");
return;
}
if ( EVENTS )
traceXML.println("processingInstruction");
RDFXMLparseWarning("XML Processing instruction - ignored", position());
}
@Override
public void skippedEntity(String name) throws SAXException {
if ( EVENTS )
traceXML.println("skippedEntity");
}
// ---- ErrorHandler
@Override
public void warning(SAXParseException exception) throws SAXException {
if ( EVENTS )
traceXML.println("warning");
errorHandler.warning(exception.getMessage(), exception.getLineNumber(), exception.getColumnNumber());
}
@Override
public void error(SAXParseException exception) throws SAXException {
if ( EVENTS )
traceXML.println("error");
// No recovery.
errorHandler.fatal(exception.getMessage(), exception.getLineNumber(), exception.getColumnNumber());
throw exception;
}
@Override
public void fatalError(SAXParseException exception) throws SAXException {
if ( EVENTS )
traceXML.println("fatalError");
errorHandler.fatal(exception.getMessage(), exception.getLineNumber(), exception.getColumnNumber());
// Should not happen.
throw exception;
}
// ---- EntityResolver
@Override
public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
if ( EVENTS )
traceXML.println("resolveEntity");
return null;
}
// ---- EntityResolver2
@Override
public InputSource resolveEntity(String name, String publicId, String baseURI, String systemId) throws SAXException, IOException {
if ( EVENTS )
traceXML.println("SAX2-resolveEntity");
return null;
}
// ---- DTDHandler
@Override
public void notationDecl(String name, String publicId, String systemId) throws SAXException {
if ( EVENTS )
traceXML.println("notationDecl");
}
@Override
public void unparsedEntityDecl(String name, String publicId, String systemId, String notationName) throws SAXException {
if ( EVENTS )
traceXML.println("unparsedEntityDecl");
}
// ---- LexicalHandler
@Override
public void startDTD(String name, String publicId, String systemId) throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-startDTD: " + systemId);
}
@Override
public void endDTD() throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-endDTD");
}
@Override
public void startEntity(String name) throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-startEntity");
}
@Override
public void endEntity(String name) throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-endEntity");
}
@Override
public void startCDATA() throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-startCDATA");
}
@Override
public void endCDATA() throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-endCDATA");
}
@Override
public void comment(char[] ch, int start, int length) throws SAXException {
if ( xmlLiteralCollecting() ) {
accCharacters.append("<!--");
accCharacters.append(ch, start, length);
accCharacters.append("-->");
return;
}
if ( EVENTS )
traceXML.println("SAX2-comment");
}
// ---- DeclHandler
@Override
public void elementDecl(String name, String model) throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-elementDecl");
}
@Override
public void attributeDecl(String eName, String aName, String type, String mode, String value) throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-attributeDecl");
}
@Override
public void internalEntityDecl(String name, String value) throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-internalEntityDecl");
}
@Override
public void externalEntityDecl(String name, String publicId, String systemId) throws SAXException {
if ( EVENTS )
traceXML.println("SAX2-externalEntityDecl");
}
@Override
public InputSource getExternalSubset(String name, String baseURI) throws SAXException, IOException {
if ( EVENTS )
traceXML.println("SAX2-getExternalSubset");
return null;
}
}