| /* ==================================================================== |
| Licensed to the Apache Software Foundation (ASF) under one or more |
| contributor license agreements. See the NOTICE file distributed with |
| this work for additional information regarding copyright ownership. |
| The ASF licenses this file to You under the Apache License, Version 2.0 |
| (the "License"); you may not use this file except in compliance with |
| the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, software |
| distributed under the License is distributed on an "AS IS" BASIS, |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| See the License for the specific language governing permissions and |
| limitations under the License. |
| ==================================================================== */ |
| |
| package org.apache.poi.xssf.extractor; |
| |
| import java.io.IOException; |
| import java.io.StringReader; |
| import java.text.DateFormat; |
| import java.text.ParseException; |
| import java.text.SimpleDateFormat; |
| import java.util.Arrays; |
| import java.util.Date; |
| import java.util.HashSet; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Set; |
| |
| import javax.xml.namespace.NamespaceContext; |
| import javax.xml.parsers.DocumentBuilder; |
| import javax.xml.xpath.XPath; |
| import javax.xml.xpath.XPathConstants; |
| import javax.xml.xpath.XPathExpressionException; |
| |
| import org.apache.logging.log4j.LogManager; |
| import org.apache.logging.log4j.Logger; |
| import org.apache.poi.ooxml.util.DocumentHelper; |
| import org.apache.poi.ooxml.util.XPathHelper; |
| import org.apache.poi.ss.usermodel.DateUtil; |
| import org.apache.poi.ss.util.CellReference; |
| import org.apache.poi.util.LocaleUtil; |
| import org.apache.poi.xssf.usermodel.XSSFCell; |
| import org.apache.poi.xssf.usermodel.XSSFMap; |
| import org.apache.poi.xssf.usermodel.XSSFRow; |
| import org.apache.poi.xssf.usermodel.XSSFTable; |
| import org.apache.poi.xssf.usermodel.XSSFTableColumn; |
| import org.apache.poi.xssf.usermodel.helpers.XSSFSingleXmlCell; |
| import org.apache.poi.xssf.usermodel.helpers.XSSFXmlColumnPr; |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.NamedNodeMap; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| import org.xml.sax.InputSource; |
| import org.xml.sax.SAXException; |
| |
| import static org.apache.logging.log4j.util.Unbox.box; |
| |
| /** |
| * Imports data from an external XML to an XLSX according to one of the mappings |
| * defined.The output XML Schema must respect this limitations: |
| * <ul> |
| * <li>the input XML must be valid according to the XML Schema used in the mapping</li> |
| * <li>denormalized table mapping is not supported (see OpenOffice part 4: chapter 3.5.1.7)</li> |
| * <li>all the namespaces used in the document must be declared in the root node</li> |
| * </ul> |
| */ |
| public class XSSFImportFromXML { |
| |
| private final XSSFMap _map; |
| |
| private static final Logger LOG = LogManager.getLogger(XSSFImportFromXML.class); |
| |
| public XSSFImportFromXML(XSSFMap map) { |
| _map = map; |
| } |
| |
| /** |
| * Imports an XML into the XLSX using the Custom XML mapping defined |
| * |
| * @param xmlInputString the XML to import |
| * @throws SAXException if error occurs during XML parsing |
| * @throws XPathExpressionException if error occurs during XML navigation |
| * @throws IOException if there are problems reading the input string |
| */ |
| public void importFromXML(String xmlInputString) throws SAXException, XPathExpressionException, IOException { |
| |
| DocumentBuilder builder = DocumentHelper.newDocumentBuilder(); |
| |
| Document doc = builder.parse(new InputSource(new StringReader(xmlInputString.trim()))); |
| |
| List<XSSFSingleXmlCell> singleXmlCells = _map.getRelatedSingleXMLCell(); |
| |
| List<XSSFTable> tables = _map.getRelatedTables(); |
| |
| XPath xpath = XPathHelper.getFactory().newXPath(); |
| |
| // Setting namespace context to XPath |
| // Assuming that the namespace prefix in the mapping xpath is the |
| // same as the one used in the document |
| xpath.setNamespaceContext(new DefaultNamespaceContext(doc)); |
| |
| for (XSSFSingleXmlCell singleXmlCell : singleXmlCells) { |
| |
| String xmlDataType = singleXmlCell.getXmlDataType(); |
| String xpathString = singleXmlCell.getXpath(); |
| Node result = (Node) xpath.evaluate(xpathString, doc, XPathConstants.NODE); |
| // result can be null if value is optional (xsd:minOccurs=0), see bugzilla 55864 |
| if (result != null) { |
| String textContent = result.getTextContent(); |
| LOG.atDebug().log("Extracting with xpath {} : value is '{}'", xpathString, textContent); |
| XSSFCell cell = singleXmlCell.getReferencedCell(); |
| LOG.atDebug().log("Setting '{}' to cell {}-{} in sheet {}", textContent, box(cell.getColumnIndex()),box(cell.getRowIndex()),cell.getSheet().getSheetName()); |
| setCellValue(textContent, cell, xmlDataType); |
| } |
| } |
| |
| for (XSSFTable table : tables) { |
| |
| String commonXPath = table.getCommonXpath(); |
| NodeList result = (NodeList) xpath.evaluate(commonXPath, doc, XPathConstants.NODESET); |
| int rowOffset = table.getStartCellReference().getRow() + table.getHeaderRowCount(); |
| int columnOffset = table.getStartCellReference().getCol(); |
| |
| table.setDataRowCount(result.getLength()); |
| |
| for (int i = 0; i < result.getLength(); i++) { |
| |
| // TODO: implement support for denormalized XMLs (see |
| // OpenOffice part 4: chapter 3.5.1.7) |
| |
| Node singleNode = result.item(i).cloneNode(true); |
| |
| for (XSSFTableColumn tableColumn : table.getColumns()) { |
| |
| XSSFXmlColumnPr xmlColumnPr = tableColumn.getXmlColumnPr(); |
| if(xmlColumnPr == null) { |
| continue; |
| } |
| |
| int rowId = rowOffset + i; |
| int columnId = columnOffset + tableColumn.getColumnIndex(); |
| String localXPath = xmlColumnPr.getLocalXPath(); |
| localXPath = localXPath.substring(localXPath.indexOf('/', 1) + 1); |
| |
| // TODO: convert the data to the cell format |
| String value = (String) xpath.evaluate(localXPath, singleNode, XPathConstants.STRING); |
| LOG.atDebug().log("Extracting with xpath {} : value is '{}'", localXPath, value); |
| XSSFRow row = table.getXSSFSheet().getRow(rowId); |
| if (row == null) { |
| row = table.getXSSFSheet().createRow(rowId); |
| } |
| |
| XSSFCell cell = row.getCell(columnId); |
| if (cell == null) { |
| cell = row.createCell(columnId); |
| } |
| LOG.atDebug().log("Setting '{}' to cell {}-{} in sheet {}", value, box(cell.getColumnIndex()),box(cell.getRowIndex()),table.getXSSFSheet().getSheetName()); |
| setCellValue(value, cell, xmlColumnPr.getXmlDataType()); |
| } |
| } |
| } |
| } |
| |
| |
| |
| private enum DataType { |
| BOOLEAN("boolean"), // |
| DOUBLE("double"), // |
| INTEGER("int", "unsignedInt", "integer"), // |
| STRING("string"), // |
| DATE("date"); |
| |
| private Set<String> xmlDataTypes; |
| |
| DataType(String... xmlDataTypes) { |
| this.xmlDataTypes = new HashSet<>(Arrays.asList(xmlDataTypes)); |
| } |
| |
| public static DataType getDataType(String xmlDataType) { |
| for (DataType dataType : DataType.values()) { |
| if (dataType.xmlDataTypes.contains(xmlDataType)) { |
| return dataType; |
| } |
| } |
| return null; |
| } |
| } |
| |
| private void setCellValue(String value, XSSFCell cell, String xmlDataType) { |
| DataType type = DataType.getDataType(xmlDataType); |
| try { |
| if (value.isEmpty() || type == null) { |
| cell.setCellValue((String) null); |
| } else { |
| switch (type) { |
| case BOOLEAN: |
| cell.setCellValue(Boolean.parseBoolean(value)); |
| break; |
| case DOUBLE: |
| cell.setCellValue(Double.parseDouble(value)); |
| break; |
| case INTEGER: |
| cell.setCellValue(Integer.parseInt(value)); |
| break; |
| case DATE: |
| DateFormat sdf = new SimpleDateFormat("yyyy-MM-dd", LocaleUtil.getUserLocale()); |
| Date date = sdf.parse(value); |
| cell.setCellValue(date); |
| if (!DateUtil.isValidExcelDate(cell.getNumericCellValue())) { |
| cell.setCellValue(value); |
| } |
| break; |
| case STRING: |
| default: |
| cell.setCellValue(value.trim()); |
| break; |
| } |
| } |
| } catch (IllegalArgumentException | ParseException e) { |
| throw new IllegalArgumentException(String.format(LocaleUtil.getUserLocale(), "Unable to format value '%s' as %s for cell %s", value, |
| type, new CellReference(cell).formatAsString())); |
| } |
| } |
| |
| private static final class DefaultNamespaceContext implements NamespaceContext { |
| /** |
| * Node from which to start searching for a xmlns attribute that binds a |
| * prefix to a namespace. |
| */ |
| private final Element _docElem; |
| |
| public DefaultNamespaceContext(Document doc) { |
| _docElem = doc.getDocumentElement(); |
| } |
| |
| @Override |
| public String getNamespaceURI(String prefix) { |
| return getNamespaceForPrefix(prefix); |
| } |
| |
| /** |
| * @param prefix Prefix to resolve. |
| * @return uri of Namespace that prefix resolves to, or |
| * <code>null</code> if specified prefix is not bound. |
| */ |
| private String getNamespaceForPrefix(String prefix) { |
| |
| // Code adapted from Xalan's org.apache.xml.utils.PrefixResolverDefault.getNamespaceForPrefix() |
| |
| if (prefix.equals("xml")) { |
| return "http://www.w3.org/XML/1998/namespace"; |
| } |
| |
| Node parent = _docElem; |
| |
| while (parent != null) { |
| |
| int type = parent.getNodeType(); |
| if (type == Node.ELEMENT_NODE) { |
| if (parent.getNodeName().startsWith(prefix + ":")) { |
| return parent.getNamespaceURI(); |
| } |
| NamedNodeMap nnm = parent.getAttributes(); |
| |
| for (int i = 0; i < nnm.getLength(); i++) { |
| Node attr = nnm.item(i); |
| String aname = attr.getNodeName(); |
| boolean isPrefix = aname.startsWith("xmlns:"); |
| |
| if (isPrefix || aname.equals("xmlns")) { |
| int index = aname.indexOf(':'); |
| String p = isPrefix ? aname.substring(index + 1) : ""; |
| |
| if (p.equals(prefix)) { |
| return attr.getNodeValue(); |
| } |
| } |
| } |
| } else if (type == Node.ENTITY_REFERENCE_NODE) { |
| continue; |
| } else { |
| break; |
| } |
| parent = parent.getParentNode(); |
| } |
| |
| return null; |
| } |
| |
| // Dummy implementation - not used! |
| @Override |
| public Iterator<String> getPrefixes(String val) { |
| return null; |
| } |
| |
| // Dummy implementation - not used! |
| @Override |
| public String getPrefix(String uri) { |
| return null; |
| } |
| } |
| |
| } |