| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| package org.apache.sling.jcr.contentloader.internal.readers; |
| |
| import java.io.BufferedInputStream; |
| import java.io.Closeable; |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.PipedInputStream; |
| import java.io.PipedOutputStream; |
| import java.net.MalformedURLException; |
| import java.net.URL; |
| import java.text.DateFormat; |
| import java.text.ParseException; |
| import java.text.SimpleDateFormat; |
| import java.util.ArrayList; |
| import java.util.Calendar; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Set; |
| import java.util.regex.Matcher; |
| import java.util.regex.Pattern; |
| |
| import javax.jcr.PropertyType; |
| import javax.jcr.RepositoryException; |
| import javax.xml.transform.Source; |
| import javax.xml.transform.Templates; |
| import javax.xml.transform.TransformerConfigurationException; |
| import javax.xml.transform.TransformerException; |
| import javax.xml.transform.TransformerFactory; |
| import javax.xml.transform.stream.StreamResult; |
| import javax.xml.transform.stream.StreamSource; |
| |
| import org.apache.felix.scr.annotations.Activate; |
| import org.apache.felix.scr.annotations.Component; |
| import org.apache.felix.scr.annotations.Properties; |
| import org.apache.felix.scr.annotations.Property; |
| import org.apache.felix.scr.annotations.Service; |
| import org.apache.sling.jcr.contentloader.ContentCreator; |
| import org.apache.sling.jcr.contentloader.ContentReader; |
| import org.kxml2.io.KXmlParser; |
| import org.slf4j.Logger; |
| import org.slf4j.LoggerFactory; |
| import org.xmlpull.v1.XmlPullParser; |
| import org.xmlpull.v1.XmlPullParserException; |
| |
| /** |
| * This reader reads an xml file defining the content. The xml format should have this |
| * format: |
| * |
| * <pre> |
| * <node> |
| * <name>the name of the node</name> |
| * <primaryNodeType>type</primaryNodeType> |
| * <mixinNodeTypes> |
| * <mixinNodeType>mixtype1</mixinNodeType> |
| * <mixinNodeType>mixtype2</mixinNodeType> |
| * </mixingNodeTypes> |
| * <properties> |
| * <property> |
| * <name>propName</name> |
| * <value>propValue</value> |
| * or |
| * <values> |
| * <value/> for multi value properties |
| * </values> |
| * <type>propType</type> |
| * </property> |
| * <!-- more properties --> |
| * </properties> |
| * <nodes> |
| * <!-- child nodes --> |
| * <node> |
| * .. |
| * </node> |
| * </nodes> |
| * </node> |
| * </pre> |
| * |
| * If you want to include a binary file in your loaded content, you may specify it using a |
| * {@link org.apache.sling.jcr.contentloader.internal.readers.XmlReader.FileDescription <code><nt:file></code>} element. |
| */ |
| @Component |
| @Service |
| @Properties({ |
| @Property(name = ContentReader.PROPERTY_EXTENSIONS, value = "xml"), |
| @Property(name = ContentReader.PROPERTY_TYPES, value = {"application/xml", "text/xml"}) |
| }) |
| public class XmlReader implements ContentReader { |
| |
| /* |
| * <node> <primaryNodeType>type</primaryNodeType> <mixinNodeTypes> |
| * <mixinNodeType>mixtype1</mixinNodeType> <mixinNodeType>mixtype2</mixinNodeType> |
| * </mixinNodeTypes> <properties> <property> <name>propName</name> |
| * <value>propValue</value> <type>propType</type> </property> <!-- more |
| * --> </properties> </node> |
| */ |
| |
| /** default log */ |
| private static final Logger logger = LoggerFactory.getLogger(XmlReader.class); |
| |
| private static final String ELEM_NODE = "node"; |
| |
| private static final String ELEM_PRIMARY_NODE_TYPE = "primaryNodeType"; |
| |
| private static final String ELEM_MIXIN_NODE_TYPE = "mixinNodeType"; |
| |
| private static final String ELEM_PROPERTY = "property"; |
| |
| private static final String ELEM_NAME = "name"; |
| |
| private static final String ELEM_VALUE = "value"; |
| |
| private static final String ELEM_VALUES = "values"; |
| |
| private static final String ELEM_TYPE = "type"; |
| |
| private static final String XML_STYLESHEET_PROCESSING_INSTRUCTION = "xml-stylesheet"; |
| |
| private static final String HREF_ATTRIBUTE = "href"; |
| |
| private static final String ELEM_FILE_NAMESPACE = "http://www.jcp.org/jcr/nt/1.0"; |
| private static final String ELEM_FILE_NAME = "file"; |
| |
| private KXmlParser xmlParser; |
| |
| @Activate |
| protected void activate() { |
| this.xmlParser = new KXmlParser(); |
| try { |
| // Make namespace-aware |
| this.xmlParser.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES, true); |
| } catch (XmlPullParserException e) { |
| throw new RuntimeException(e); |
| } |
| } |
| |
| // ---------- XML content access ------------------------------------------- |
| |
| |
| /** |
| * @see org.apache.sling.jcr.contentloader.ContentReader#parse(URL, org.apache.sling.jcr.contentloader.ContentCreator) |
| */ |
| public synchronized void parse(final URL url, final ContentCreator creator) |
| throws IOException, RepositoryException { |
| BufferedInputStream bufferedInput = null; |
| try { |
| // We need to buffer input, so that we can reset the stream if we encounter an XSL stylesheet reference |
| bufferedInput = new BufferedInputStream(url.openStream()); |
| parseInternal(bufferedInput, creator, url); |
| } catch (XmlPullParserException xppe) { |
| throw (IOException) new IOException(xppe.getMessage()).initCause(xppe); |
| } finally { |
| closeStream(bufferedInput); |
| } |
| } |
| |
| /* (non-Javadoc) |
| * @see org.apache.sling.jcr.contentloader.ContentReader#parse(java.io.InputStream, org.apache.sling.jcr.contentloader.ContentCreator) |
| */ |
| public void parse(InputStream ins, ContentCreator creator) |
| throws IOException, RepositoryException { |
| BufferedInputStream bufferedInput = null; |
| try { |
| // We need to buffer input, so that we can reset the stream if we encounter an XSL stylesheet reference |
| bufferedInput = new BufferedInputStream(ins); |
| URL xmlLocation = null; |
| parseInternal(bufferedInput, creator, xmlLocation); |
| } catch (XmlPullParserException xppe) { |
| throw (IOException) new IOException(xppe.getMessage()).initCause(xppe); |
| } finally { |
| closeStream(bufferedInput); |
| } |
| } |
| |
| private void parseInternal(final InputStream bufferedInput, |
| final ContentCreator creator, |
| final URL xmlLocation) |
| throws XmlPullParserException, IOException, RepositoryException { |
| final StringBuilder contentBuffer = new StringBuilder(); |
| // Mark the beginning of the stream. We assume that if there's an XSL processing instruction, |
| // it will occur in the first gulp - which makes sense, as processing instructions must be |
| // specified before the root element of an XML file. |
| bufferedInput.mark(bufferedInput.available()); |
| // set the parser input, use null encoding to force detection with |
| // <?xml?> |
| this.xmlParser.setInput(bufferedInput, null); |
| |
| NodeDescription.SHARED.clear(); |
| PropertyDescription.SHARED.clear(); |
| FileDescription.SHARED.clear(); |
| |
| NodeDescription currentNode = null; |
| PropertyDescription currentProperty = null; |
| String currentElement; |
| |
| |
| int eventType = this.xmlParser.getEventType(); |
| while (eventType != XmlPullParser.END_DOCUMENT) { |
| if (eventType == XmlPullParser.PROCESSING_INSTRUCTION) { |
| ProcessingInstruction pi = new ProcessingInstruction(this.xmlParser.getText()); |
| // Look for a reference to an XSL stylesheet |
| if (pi.getName().equals(XML_STYLESHEET_PROCESSING_INSTRUCTION) && xmlLocation != null ) { |
| // Rewind the input stream to the beginning, so that it can be transformed with XSL |
| bufferedInput.reset(); |
| // Pipe the XML input through the XSL transformer |
| XslTransformerStream transformerStream = new XslTransformerStream(bufferedInput, pi.getAttribute(HREF_ATTRIBUTE), xmlLocation); |
| // Start the transformer thread |
| transformerStream.startTransform(); |
| // Re-run the XML parser, now with the transformed XML |
| parseInternal(transformerStream, creator, xmlLocation); |
| transformerStream.close(); |
| return; |
| |
| } |
| } |
| if (eventType == XmlPullParser.START_TAG) { |
| |
| currentElement = this.xmlParser.getName(); |
| |
| if (ELEM_PROPERTY.equals(currentElement)) { |
| currentNode = NodeDescription.create(currentNode, creator); |
| currentProperty = PropertyDescription.SHARED; |
| } else if (ELEM_NODE.equals(currentElement)) { |
| currentNode = NodeDescription.create(currentNode, creator); |
| currentNode = NodeDescription.SHARED; |
| } else if (ELEM_FILE_NAME.equals(currentElement) && ELEM_FILE_NAMESPACE.equals(this.xmlParser.getNamespace())) { |
| if (xmlLocation != null) { |
| int attributeCount = this.xmlParser.getAttributeCount(); |
| if (attributeCount < 2 || attributeCount > 3) { |
| throw new IOException("File element must have these attributes: url, mimeType and lastModified: " + xmlLocation); |
| } |
| try { |
| AttributeMap attributes = AttributeMap.getInstance(); |
| attributes.setValues(xmlParser); |
| FileDescription.SHARED.setBaseLocation(xmlLocation); |
| FileDescription.SHARED.setValues(attributes); |
| attributes.clear(); |
| } catch (ParseException e) { |
| IOException ioe = new IOException("Error parsing file description: " + xmlLocation); |
| ioe.initCause(e); |
| throw ioe; |
| } |
| FileDescription.SHARED.create(creator); |
| FileDescription.SHARED.clear(); |
| } else { |
| logger.warn("file element encountered when xml location isn't known. skipping."); |
| } |
| } |
| |
| } else if (eventType == XmlPullParser.END_TAG) { |
| |
| String qName = this.xmlParser.getName(); |
| String content = contentBuffer.toString().trim(); |
| contentBuffer.delete(0, contentBuffer.length()); |
| |
| if (ELEM_PROPERTY.equals(qName)) { |
| currentProperty = PropertyDescription.create(currentProperty, creator); |
| |
| } else if (ELEM_NAME.equals(qName)) { |
| if (currentProperty != null) { |
| currentProperty.name = content; |
| } else if (currentNode != null) { |
| currentNode.name = content; |
| } |
| |
| } else if (ELEM_VALUE.equals(qName)) { |
| if ( currentProperty == null ) { |
| throw new IOException("XML file does not seem to contain valid content xml. Unexpected " + ELEM_VALUE + " element in : " + xmlLocation); |
| } |
| currentProperty.addValue(content); |
| |
| } else if (ELEM_VALUES.equals(qName)) { |
| if ( currentProperty == null ) { |
| throw new IOException("XML file does not seem to contain valid content xml. Unexpected " + ELEM_VALUE + " element in : " + xmlLocation); |
| } |
| currentProperty.isMultiValue = true; |
| |
| } else if (ELEM_TYPE.equals(qName)) { |
| if ( currentProperty == null ) { |
| throw new IOException("XML file does not seem to contain valid content xml. Unexpected " + ELEM_VALUE + " element in : " + xmlLocation); |
| } |
| currentProperty.type = content; |
| |
| } else if (ELEM_NODE.equals(qName)) { |
| currentNode = NodeDescription.create(currentNode, creator); |
| creator.finishNode(); |
| |
| } else if (ELEM_PRIMARY_NODE_TYPE.equals(qName)) { |
| if ( currentNode == null ) { |
| throw new IOException("Element is not allowed at this location: " + qName + " in " + xmlLocation); |
| } |
| currentNode.primaryNodeType = content; |
| |
| } else if (ELEM_MIXIN_NODE_TYPE.equals(qName)) { |
| if ( currentNode == null ) { |
| throw new IOException("Element is not allowed at this location: " + qName + " in " + xmlLocation); |
| } |
| currentNode.addMixinType(content); |
| } |
| } else if (eventType == XmlPullParser.TEXT || eventType == XmlPullParser.CDSECT) { |
| contentBuffer.append(this.xmlParser.getText()); |
| } |
| |
| eventType = this.xmlParser.nextToken(); |
| } |
| } |
| |
| /** |
| * Takes an XML input stream and pipes it through an XSL transformer. |
| * Callers should call {@link #startTransform} before trying to use the stream, or the caller will wait indefinately for input. |
| */ |
| private static class XslTransformerStream extends PipedInputStream { |
| private InputStream inputXml; |
| private String xslHref; |
| private Thread transformerThread; |
| private PipedOutputStream pipedOut; |
| private URL xmlLocation; |
| |
| /** |
| * Instantiate the XslTransformerStream. |
| * @param inputXml XML to be transformed. |
| * @param xslHref Path to an XSL stylesheet |
| * @param xmlLocation |
| * @throws IOException |
| */ |
| public XslTransformerStream(InputStream inputXml, String xslHref, URL xmlLocation) throws IOException { |
| super(); |
| this.inputXml = inputXml; |
| this.xslHref = xslHref; |
| this.transformerThread = null; |
| this.pipedOut = new PipedOutputStream(this); |
| this.xmlLocation = xmlLocation; |
| } |
| |
| /** |
| * Starts the XSL transformer in a new thread, so that it can pipe its output to our <code>PipedInputStream</code>. |
| * @throws IOException |
| */ |
| public void startTransform() throws IOException { |
| final URL xslResource = new URL(xmlLocation, this.xslHref); |
| |
| /* |
| if (xslResource == null) { |
| throw new IOException("Could not find " + xslHref); |
| } |
| */ |
| |
| transformerThread = new Thread( |
| new Runnable() { |
| public void run() { |
| try { |
| Source xml = new StreamSource(inputXml); |
| Source xsl = new StreamSource(xslResource.toExternalForm()); |
| final StreamResult streamResult; |
| final Templates templates = TransformerFactory.newInstance().newTemplates(xsl); |
| streamResult = new StreamResult(pipedOut); |
| templates.newTransformer().transform(xml, streamResult); |
| } catch (TransformerConfigurationException e) { |
| throw new RuntimeException("Error initializing XSL transformer", e); |
| } catch (TransformerException e) { |
| throw new RuntimeException("Error transforming", e); |
| } finally { |
| closeStream(pipedOut); |
| } |
| } |
| } |
| , "XslTransformerThread"); |
| transformerThread.start(); |
| } |
| |
| |
| } |
| |
| /** |
| * Utility function to close a stream if it is still open. |
| * @param closeable Stream to close |
| */ |
| private static void closeStream(Closeable closeable) { |
| if (closeable != null) { |
| try { |
| closeable.close(); |
| } catch (IOException ignore) { |
| } |
| } |
| } |
| |
| protected static final class NodeDescription { |
| |
| public static NodeDescription SHARED = new NodeDescription(); |
| |
| public String name; |
| public String primaryNodeType; |
| public List<String> mixinTypes; |
| |
| public static NodeDescription create(NodeDescription desc, ContentCreator creator) |
| throws RepositoryException { |
| if ( desc != null ) { |
| creator.createNode(desc.name, desc.primaryNodeType, desc.getMixinTypes()); |
| desc.clear(); |
| } |
| return null; |
| } |
| |
| public void addMixinType(String v) { |
| if ( this.mixinTypes == null ) { |
| this.mixinTypes = new ArrayList<String>(); |
| } |
| this.mixinTypes.add(v); |
| } |
| |
| |
| private String[] getMixinTypes() { |
| if ( this.mixinTypes == null || this.mixinTypes.size() == 0) { |
| return null; |
| } |
| return mixinTypes.toArray(new String[this.mixinTypes.size()]); |
| } |
| |
| private void clear() { |
| this.name = null; |
| this.primaryNodeType = null; |
| if ( this.mixinTypes != null ) { |
| this.mixinTypes.clear(); |
| } |
| } |
| } |
| |
| protected static final class PropertyDescription { |
| |
| public static PropertyDescription SHARED = new PropertyDescription(); |
| |
| public static PropertyDescription create(PropertyDescription desc, ContentCreator creator) |
| throws RepositoryException { |
| int type = (desc.type == null ? PropertyType.STRING : PropertyType.valueFromName(desc.type)); |
| if ( desc.isMultiValue ) { |
| creator.createProperty(desc.name, type, desc.getPropertyValues()); |
| } else { |
| String value = null; |
| if ( desc.values != null && desc.values.size() == 1 ) { |
| value = desc.values.get(0); |
| } |
| creator.createProperty(desc.name, type, value); |
| } |
| desc.clear(); |
| return null; |
| } |
| |
| public String name; |
| public String type; |
| public List<String> values; |
| public boolean isMultiValue; |
| |
| public void addValue(String v) { |
| if ( this.values == null ) { |
| this.values = new ArrayList<String>(); |
| } |
| this.values.add(v); |
| } |
| |
| private String[] getPropertyValues() { |
| if ( this.values == null || this.values.size() == 0) { |
| return null; |
| } |
| return values.toArray(new String[this.values.size()]); |
| } |
| |
| private void clear() { |
| this.name = null; |
| this.type = null; |
| if ( this.values != null ) { |
| this.values.clear(); |
| } |
| this.isMultiValue = false; |
| } |
| } |
| |
| /** |
| * Represents an XML processing instruction.<br /> |
| * A processing instruction like <code><?xml-stylesheet href="stylesheet.xsl" type="text/css"?></code> |
| * will have <code>name</code> == <code>"xml-stylesheet"</code> and two attributes: <code>href</code> and <code>type</code>. |
| */ |
| private static class ProcessingInstruction { |
| |
| private Map<String, String> attributes = new HashMap<String, String>(); |
| private static final Pattern ATTRIBUTE_PATTERN = Pattern.compile("\\s(.[^=\\s]*)\\s?=\\s?\"(.[^\"]*)\""); |
| private static final Pattern NAME_PATTERN = Pattern.compile("^(.[^\\s\\?>]*)"); |
| private String name; |
| |
| public ProcessingInstruction(String text) throws IOException { |
| final Matcher nameMatcher = NAME_PATTERN.matcher(text); |
| if (!nameMatcher.find()) { |
| throw new IOException("Malformed processing instruction: " + text); |
| } |
| |
| this.name = nameMatcher.group(1); |
| final Matcher attributeMatcher = ATTRIBUTE_PATTERN.matcher(text); |
| while (attributeMatcher.find()) { |
| attributes.put(attributeMatcher.group(1), attributeMatcher.group(2)); |
| } |
| } |
| |
| public String getName() { |
| return name; |
| } |
| |
| public String getAttribute(String key) { |
| return this.attributes.get(key); |
| } |
| |
| } |
| |
| /** |
| * Represents a reference to a file that is to be loaded into the repository. The file is referenced by an |
| * XML element named <code><nt:file></code>, with the attributes <code>src</code>, |
| * <code>mimeType</code> and <code>lastModified</code>. <br/><br/>Example: |
| * <pre> |
| * <nt:file src="../../image.png" mimeType="image/png" lastModified="1977-06-01T07:00:00+0100" /> |
| * </pre> |
| * The date format for <code>lastModified</code> is <code>yyyy-MM-dd'T'HH:mm:ssZ</code>. |
| * The <code>lastModified</code> attribute is optional. If missing, the last modified date reported by the |
| * filesystem will be used. |
| */ |
| protected static final class FileDescription { |
| |
| private URL url; |
| private String mimeType; |
| private URL baseLocation; |
| private Long lastModified; |
| |
| public static FileDescription SHARED = new FileDescription(); |
| private static final String SRC_ATTRIBUTE = "src"; |
| private static final String MIME_TYPE_ATTRIBUTE = "mimeType"; |
| private static final String LAST_MODIFIED_ATTRIBUTE = "lastModified"; |
| public static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); |
| |
| static { |
| DATE_FORMAT.setLenient(true); |
| } |
| |
| public void setValues(AttributeMap attributes) throws MalformedURLException, ParseException { |
| Set<String> attributeNames = attributes.keySet(); |
| for (String name : attributeNames) { |
| String value = attributes.get(name); |
| if (name.equals(SRC_ATTRIBUTE)) { |
| url = new URL(baseLocation, value); |
| } else if (name.equals(MIME_TYPE_ATTRIBUTE)) { |
| mimeType = value; |
| } else if (name.equals(LAST_MODIFIED_ATTRIBUTE)) { |
| lastModified = DATE_FORMAT.parse(value).getTime(); |
| } |
| } |
| } |
| |
| public void create(ContentCreator creator) throws RepositoryException, IOException { |
| String[] parts = url.getPath().split("/"); |
| String name = parts[parts.length - 1]; |
| InputStream stream = url.openStream(); |
| if (lastModified == null) { |
| try { |
| lastModified = new File(url.toURI()).lastModified(); |
| } catch (Throwable ignore) { |
| // Could not get lastModified from file system, so we'll use current date |
| lastModified = Calendar.getInstance().getTimeInMillis(); |
| } |
| } |
| creator.createFileAndResourceNode(name, stream, mimeType, lastModified); |
| closeStream(stream); |
| creator.finishNode(); |
| creator.finishNode(); |
| this.clear(); |
| } |
| |
| public URL getUrl() { |
| return url; |
| } |
| |
| public String getMimeType() { |
| return mimeType; |
| } |
| |
| public Long getLastModified() { |
| return lastModified; |
| } |
| |
| public void clear() { |
| this.url = null; |
| this.mimeType = null; |
| this.lastModified = null; |
| } |
| |
| public void setBaseLocation(URL xmlLocation) { |
| this.baseLocation = xmlLocation; |
| } |
| } |
| |
| /** |
| * Utility class for dealing with attributes from KXmlParser. |
| */ |
| protected static class AttributeMap extends HashMap<String, String> { |
| |
| private static final long serialVersionUID = -6304058237706001104L; |
| private static final AttributeMap instance = new AttributeMap(); |
| |
| public static AttributeMap getInstance() { |
| return instance; |
| } |
| |
| /** |
| * Puts values in an <code>AttributeMap</code> by extracting attributes from the <code>xmlParser</code>. |
| * @param xmlParser <code>xmlParser</code> to extract attributes from. The parser must be |
| * in {@link org.xmlpull.v1.XmlPullParser#START_TAG} state. |
| */ |
| public void setValues(KXmlParser xmlParser) { |
| final int count = xmlParser.getAttributeCount(); |
| for (int i = 0; i < count; i++) { |
| this.put(xmlParser.getAttributeName(i), xmlParser.getAttributeValue(i)); |
| } |
| } |
| } |
| } |