blob: 8cf8b34899109e34445f14077595cec274d5ca20 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.xml.inference;
import org.apache.nifi.schema.inference.RecordSource;
import org.apache.nifi.xml.processing.ProcessingException;
import org.apache.nifi.xml.processing.stream.StandardXMLEventReaderProvider;
import org.apache.nifi.xml.processing.stream.XMLEventReaderProvider;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import javax.xml.transform.stream.StreamSource;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
public class XmlRecordSource implements RecordSource<XmlNode> {
private final XMLEventReader xmlEventReader;
private final String contentFieldName;
private final boolean parseXmlAttributes;
public XmlRecordSource(final InputStream in, final String contentFieldName, final boolean ignoreWrapper, final boolean parseXmlAttributes) throws IOException {
this.contentFieldName = contentFieldName;
this.parseXmlAttributes = parseXmlAttributes;
try {
final XMLEventReaderProvider provider = new StandardXMLEventReaderProvider();
xmlEventReader = provider.getEventReader(new StreamSource(in));
if (ignoreWrapper) {
readStartElement();
}
} catch (final ProcessingException|XMLStreamException e) {
throw new IOException("Could not parse XML", e);
}
}
@Override
public XmlNode next() throws IOException {
try {
// Find a start element
final StartElement startElement = readStartElement();
if (startElement == null) {
return null;
}
final XmlNode xmlNode = readNext(startElement);
return xmlNode;
} catch (final XMLStreamException xmle) {
throw new IOException(xmle);
}
}
private XmlNode readNext(final StartElement startElement) throws XMLStreamException, IOException {
// Parse everything until we encounter the end element
final StringBuilder content = new StringBuilder();
final Map<String, XmlNode> childNodes = new LinkedHashMap<>();
if (parseXmlAttributes) {
addXmlAttributesToChildNodes(startElement, childNodes);
}
while (xmlEventReader.hasNext()) {
final XMLEvent xmlEvent = xmlEventReader.nextEvent();
if (xmlEvent.isEndDocument()) {
throw new EOFException("Expected to encounter End-of-Element tag for start tag '" + startElement.getName() + "'");
}
if (xmlEvent.isEndElement()) {
break;
}
if (xmlEvent.isCharacters()) {
final Characters characters = xmlEvent.asCharacters();
if (!characters.isWhiteSpace()) {
content.append(characters.getData());
}
}
if (xmlEvent.isStartElement()) {
final StartElement childStartElement = xmlEvent.asStartElement();
final XmlNode childNode = readNext(childStartElement);
final String childName = childStartElement.getName().getLocalPart();
final XmlNode existingNode = childNodes.get(childName);
if (existingNode == null) {
childNodes.put(childName, childNode);
} else if (existingNode.getNodeType() == XmlNodeType.ARRAY) {
((XmlArrayNode) existingNode).addElement(childNode);
} else {
final XmlArrayNode arrayNode = new XmlArrayNode(childStartElement.getName().getLocalPart());
arrayNode.addElement(existingNode);
arrayNode.addElement(childNode);
childNodes.put(childName, arrayNode);
}
}
}
final String nodeName = startElement.getName().getLocalPart();
if (childNodes.isEmpty()) {
return new XmlTextNode(nodeName, content.toString().trim());
} else {
final String textContent = content.toString().trim();
if (!textContent.equals("")) {
childNodes.put(contentFieldName, new XmlTextNode(contentFieldName, textContent));
}
return new XmlContainerNode(nodeName, childNodes);
}
}
private StartElement readStartElement() throws XMLStreamException {
while (xmlEventReader.hasNext()) {
final XMLEvent xmlEvent = xmlEventReader.nextEvent();
if (xmlEvent.isStartElement()) {
final StartElement startElement = xmlEvent.asStartElement();
return startElement;
}
}
return null;
}
private void addXmlAttributesToChildNodes(StartElement startElement, Map<String, XmlNode> childNodes) {
final Iterator<?> attributeIterator = startElement.getAttributes();
while (attributeIterator.hasNext()) {
final Attribute attribute = (Attribute) attributeIterator.next();
final String attributeName = attribute.getName().getLocalPart();
childNodes.put(attributeName, new XmlTextNode(attributeName, attribute.getValue()));
}
}
}