| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.stanbol.enhancer.engines.metaxa.core.html; |
| |
| |
| import java.io.ByteArrayInputStream; |
| import java.io.File; |
| import java.io.IOException; |
| import java.io.OutputStream; |
| import java.io.StringWriter; |
| import java.io.UnsupportedEncodingException; |
| import java.util.ArrayList; |
| import java.util.List; |
| |
| import javax.xml.parsers.DocumentBuilder; |
| import javax.xml.parsers.DocumentBuilderFactory; |
| import javax.xml.parsers.ParserConfigurationException; |
| import javax.xml.transform.OutputKeys; |
| import javax.xml.transform.Result; |
| import javax.xml.transform.Source; |
| import javax.xml.transform.Transformer; |
| import javax.xml.transform.TransformerConfigurationException; |
| import javax.xml.transform.TransformerException; |
| import javax.xml.transform.TransformerFactory; |
| import javax.xml.transform.dom.DOMSource; |
| import javax.xml.transform.stream.StreamResult; |
| |
| import org.w3c.dom.Document; |
| import org.w3c.dom.Element; |
| import org.w3c.dom.NamedNodeMap; |
| import org.w3c.dom.Node; |
| import org.w3c.dom.NodeList; |
| import org.xml.sax.SAXException; |
| import org.xml.sax.SAXParseException; |
| |
| /** |
| * <code>DOMUtils</code> provides convenience methods for working with DOM |
| * documents. |
| * |
| * @author Walter Kasper, DFKI |
| * @author Joerg Steffen, DFKI |
| * @version $Id$ |
| */ |
| public final class DOMUtils { |
| |
| /** |
| * This prints the given DOM document to System.out with indentation and |
| * utf-8 encoding. |
| * |
| * @param doc |
| * a DOM <code>Document</code> |
| */ |
| public static void printXML(Document doc) { |
| |
| try { |
| // prepare the DOM document for writing |
| Source source = new DOMSource(doc); |
| |
| // prepare the output |
| Result result = new StreamResult(System.out); |
| |
| // write the DOM document to the file |
| // get Transformer |
| Transformer xformer = |
| TransformerFactory.newInstance().newTransformer(); |
| xformer.setOutputProperty( |
| OutputKeys.INDENT, "yes"); |
| xformer.setOutputProperty( |
| OutputKeys.ENCODING, "UTF-8"); |
| xformer.setOutputProperty( |
| "{http://xml.apache.org/xslt}indent-amount", "2"); |
| xformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
| |
| // write to System.out |
| xformer.transform(source, result); |
| |
| } catch (TransformerConfigurationException tce) { |
| // error generated during transformer configuration |
| System.err.println(tce.getMessage()); |
| // use the contained exception, if any |
| Throwable x = tce; |
| if (tce.getException() != null) { |
| x = tce.getException(); |
| } |
| x.printStackTrace(); |
| } catch (TransformerException te) { |
| // error generated by the transformer |
| System.err.println(te.getMessage()); |
| // use the contained exception, if any |
| Throwable x = te; |
| if (te.getException() != null) { |
| x = te.getException(); |
| } |
| x.printStackTrace(); |
| } |
| } |
| |
| |
| /** |
| * This returns a string representation of the given document. |
| * |
| * @param doc |
| * an XML <code>Document</code> |
| * @param encoding |
| * a <code>String</code> with the encoding to use |
| * @param docTypeDef |
| * a <code>String</code> with the DTD name; use <code>null</code> |
| * for no DTD |
| * @return a <code>String</code> with the XML string |
| */ |
| public static String getStringFromDoc( |
| Document doc, String encoding, String docTypeDef) { |
| |
| try { |
| |
| // use a Transformer for output |
| TransformerFactory tFactory = TransformerFactory.newInstance(); |
| Transformer xformer = tFactory.newTransformer(); |
| xformer.setOutputProperty( |
| OutputKeys.INDENT, "yes"); |
| xformer.setOutputProperty( |
| OutputKeys.ENCODING, encoding); |
| xformer.setOutputProperty( |
| "{http://xml.apache.org/xslt}indent-amount", "2"); |
| xformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
| if (null != docTypeDef) { |
| xformer |
| .setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, docTypeDef); |
| } |
| |
| DOMSource source = new DOMSource(doc); |
| StringWriter sw = new StringWriter(); |
| StreamResult result = new StreamResult(sw); |
| xformer.transform(source, result); |
| return sw.toString(); |
| |
| } catch (TransformerConfigurationException tce) { |
| // error generated by the parser |
| System.err.println("** Transformer Factory error"); |
| System.err.println(" " + tce.getMessage()); |
| |
| // use the contained exception, if any |
| Throwable x = tce; |
| if (tce.getException() != null) { |
| x = tce.getException(); |
| } |
| x.printStackTrace(); |
| |
| } catch (TransformerException te) { |
| // error generated by the parser |
| System.err.println("** Transformation error"); |
| System.err.println(" " + te.getMessage()); |
| |
| // use the contained exception, if any |
| Throwable x = te; |
| if (te.getException() != null) { |
| x = te.getException(); |
| } |
| x.printStackTrace(); |
| } |
| |
| return null; |
| } |
| |
| /** |
| * This method writes a DOM document to the given output stream. |
| * |
| * @param doc |
| * a DOM <code>Document</code> |
| * @param encoding |
| * a <code>String</code> with the encoding to use |
| * @param docTypeDef |
| * a <code>String</code> with the DTD name; use <code>null</code> |
| * for no DTD |
| * @param out |
| * an <code>OutputStream</code> where to write the DOM document |
| */ |
| public static void writeXml( |
| Document doc, String encoding, String docTypeDef, OutputStream out) { |
| |
| try { |
| // prepare the DOM document |
| Source source = new DOMSource(doc); |
| |
| // prepare the output |
| Result result = new StreamResult(out); |
| |
| // write the DOM document to the file |
| // get Transformer |
| Transformer xformer = |
| TransformerFactory.newInstance().newTransformer(); |
| xformer.setOutputProperty( |
| OutputKeys.INDENT, "yes"); |
| xformer.setOutputProperty( |
| OutputKeys.ENCODING, encoding); |
| xformer.setOutputProperty( |
| "{http://xml.apache.org/xslt}indent-amount", "2"); |
| xformer.setOutputProperty(OutputKeys.METHOD, "xml"); |
| if (null != docTypeDef) { |
| xformer |
| .setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, docTypeDef); |
| } |
| // write to a file |
| xformer.transform(source, result); |
| |
| } catch (TransformerConfigurationException tce) { |
| // error generated during transformer configuration |
| System.err.println(tce.getMessage()); |
| // use the contained exception, if any |
| Throwable x = tce; |
| if (tce.getException() != null) { |
| x = tce.getException(); |
| } |
| x.printStackTrace(); |
| } catch (TransformerException te) { |
| // error generated by the transformer |
| System.err.println(te.getMessage()); |
| // use the contained exception, if any |
| Throwable x = te; |
| if (te.getException() != null) { |
| x = te.getException(); |
| } |
| x.printStackTrace(); |
| } |
| } |
| |
| |
| /** |
| * This parses the given XML string and creates a DOM Document. |
| * |
| * @param fileName |
| * a <code>String</code> with the source file name |
| * @param encoding |
| * a <code>String</code> denoting the encoding of the XML string |
| * @return Document a DOM <code>Document</code>, <code>null</code> if |
| * parsing fails |
| */ |
| public static Document parse(String xml, String encoding) { |
| |
| if (encoding == null) |
| encoding = "UTF-8"; |
| Document document = null; |
| // initiate DocumentBuilderFactory |
| DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
| |
| // to get a validating parser |
| factory.setValidating(false); |
| // to get one that understands namespaces |
| factory.setNamespaceAware(true); |
| factory.setIgnoringElementContentWhitespace(true); |
| try { |
| // get DocumentBuilder |
| DocumentBuilder builder = factory.newDocumentBuilder(); |
| // parse and load into memory the Document |
| document = |
| builder.parse(new ByteArrayInputStream(xml.getBytes(encoding))); |
| } catch (UnsupportedEncodingException e) { |
| e.printStackTrace(); |
| } catch (ParserConfigurationException e) { |
| e.printStackTrace(); |
| } catch (SAXException e) { |
| e.printStackTrace(); |
| } catch (IOException e) { |
| e.printStackTrace(); |
| } |
| return document; |
| } |
| |
| /** |
| * This parses the given XML file and creates a DOM Document. |
| * |
| * @param fileName |
| * a <code>String</code> with the source file name |
| * @param validation |
| * a <code>boolean</code> indicatiing if the parsing uses DTD |
| * valudation |
| * @return Document a DOM <code>Document</code>, <code>null</code> if |
| * parsing fails |
| */ |
| public static Document parse(String fileName, boolean validation) { |
| |
| Document document = null; |
| // initiate DocumentBuilderFactory |
| DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); |
| |
| // to get a validating parser |
| factory.setValidating(validation); |
| // to get one that understands namespaces |
| factory.setNamespaceAware(true); |
| factory.setIgnoringElementContentWhitespace(true); |
| |
| try { |
| // get DocumentBuilder |
| DocumentBuilder builder = factory.newDocumentBuilder(); |
| // parse and load into memory the Document |
| document = builder.parse(new File(fileName)); |
| return document; |
| |
| } catch (SAXParseException spe) { |
| // error generated by the parser |
| System.err.println( |
| "Parsing error, line " + spe.getLineNumber() + ", uri " |
| + spe.getSystemId()); |
| System.err.println(" " + spe.getMessage()); |
| // use the contained exception, if any |
| Exception x = spe; |
| if (spe.getException() != null) { |
| x = spe.getException(); |
| } |
| x.printStackTrace(); |
| } catch (SAXException sxe) { |
| // error generated during parsing |
| System.err.println(sxe.getMessage()); |
| // use the contained exception, if any |
| Exception x = sxe; |
| if (sxe.getException() != null) { |
| x = sxe.getException(); |
| } |
| x.printStackTrace(); |
| } catch (ParserConfigurationException pce) { |
| // parser with specified options can't be built |
| System.err.println(pce.getMessage()); |
| pce.printStackTrace(); |
| } catch (IOException ioe) { |
| // i/o error |
| System.err.println(ioe.getMessage()); |
| ioe.printStackTrace(); |
| } |
| |
| return null; |
| } |
| |
| /** |
| * This counts the elements in the given document by tag name. |
| * |
| * @param tag |
| * a <code>String</code> with a tag name |
| * @param doc |
| * a DOM <code>Document</code> |
| * @return number an <code>int</code> with the number of elements by tag |
| * name |
| */ |
| public static int countByTagName(String tag, Document doc) { |
| |
| NodeList list = doc.getElementsByTagName(tag); |
| return list.getLength(); |
| } |
| |
| /** |
| * This realizes the <code>indexOf</code> method of the |
| * <code>java.util.List</code> interface for <code>NodeList</code>. |
| * |
| * @param list |
| * a <code>NodeList</code> value |
| * @param node |
| * a <code>Node</code> value |
| * @return an <code>int</code> value, giving the position of |
| * <code>node</code> in <code>list</code> or -1, if node is not |
| * contained in the list |
| */ |
| public static int indexOf(NodeList list, Node node) { |
| |
| for (int i = 0, j = list.getLength(); i < j; ++i) { |
| if (list.item(i) == node) { |
| return i; |
| } |
| } |
| return -1; |
| } |
| |
| /** |
| * This concatenates the string values of all text nodes which are direct |
| * children of the given node. If <code>node</code> is a text or attribute |
| * node, its value is returned. Otherwise <code>null</code> is returned |
| * (improvement potential!). |
| * |
| * @param node |
| * a <code>Node</code> value |
| * @return a <code>String</code> with the concatenated text |
| */ |
| public static String getText(Node node) { |
| |
| short nodeType = node.getNodeType(); |
| if ((nodeType == Node.TEXT_NODE) || (nodeType == Node.ATTRIBUTE_NODE) |
| || (nodeType == Node.CDATA_SECTION_NODE)) { |
| return node.getNodeValue(); |
| } |
| else if (nodeType == Node.ELEMENT_NODE) { |
| NodeList dtrs = node.getChildNodes(); |
| StringBuffer sb = new StringBuffer(); |
| for (int i = 0, j = dtrs.getLength(); i < j; ++i) { |
| Node item = dtrs.item(i); |
| if (item.getNodeType() == Node.TEXT_NODE |
| || item.getNodeType() == Node.CDATA_SECTION_NODE) { |
| sb.append(item.getNodeValue()); |
| } |
| } |
| return sb.toString(); |
| } |
| return null; |
| } |
| |
| /** |
| * This selects all direct children of the given element with the given |
| * name. If the name is <code>null</code>, all children are returned. |
| * |
| * @param ele |
| * an <code>Element</code> value |
| * @param name |
| * a <code>String</code> with the children's name |
| * @return a <code>List</code> of <code>Node</code>s with the children |
| */ |
| public static List<org.w3c.dom.Node> getChildren( |
| Element ele, String name) { |
| |
| NodeList dtrs = ele.getChildNodes(); |
| List<org.w3c.dom.Node> eles = new ArrayList<org.w3c.dom.Node>(); |
| for (int i = 0, j = dtrs.getLength(); i < j; ++i) { |
| org.w3c.dom.Node item = dtrs.item(i); |
| if (name == null || item.getNodeName().equals(name)) { |
| eles.add(item); |
| } |
| } |
| |
| return eles; |
| } |
| |
| /** |
| * This selects all direct children of type 'Element' of the given element. |
| * |
| * @param ele |
| * an <code>Element</code> value |
| * @return a <code>List</code> of <code>Elmenet</code>s with the element |
| * children |
| */ |
| public static List<Element> getChildrenElements(Element ele) { |
| NodeList dtrs = ele.getChildNodes(); |
| List<Element> eles = new ArrayList<Element>(); |
| for (int i = 0, j = dtrs.getLength(); i < j; ++i) { |
| org.w3c.dom.Node item = dtrs.item(i); |
| if (item.getNodeType() == Node.ELEMENT_NODE) { |
| eles.add((Element)item); |
| } |
| } |
| |
| return eles; |
| } |
| |
| /** |
| * This returns the first child element with the given name found at the |
| * given element. |
| * |
| * @param ele |
| * an <code>Element</code> value |
| * @param name |
| * a <code>String</code> with the name of the child element |
| * @return a <code>Element</code> with the child or <code>null</code> if no |
| * such child was found |
| */ |
| public static Element getFirstChild(Element ele, String name) { |
| NodeList dtrs = ele.getChildNodes(); |
| for (int i = 0, iMax = dtrs.getLength(); i < iMax; ++i) { |
| org.w3c.dom.Node item = dtrs.item(i); |
| if (item.getNodeName().equals(name)) { |
| return (Element)item; |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * This adds a new child with the given name to the given element. |
| * |
| * @param ele |
| * an <code>Element</code> |
| * @param name |
| * a <code>String</code> with the name of the child |
| * @return a <code>Element</code> with the newly created child |
| */ |
| public static Element addChild(Element ele, String name) { |
| |
| Element child = ele.getOwnerDocument().createElement(name); |
| ele.appendChild(child); |
| return child; |
| } |
| |
| } |