blob: 9b540f9654033e0b8a6639847051c018e9eff32c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.metaxa.core.html;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
/**
* <code>DOMUtils</code> provides convenience methods for working with DOM
* documents.
*
* @author Walter Kasper, DFKI
* @author Joerg Steffen, DFKI
* @version $Id$
*/
public final class DOMUtils {
/**
* This prints the given DOM document to System.out with indentation and
* utf-8 encoding.
*
* @param doc
* a DOM <code>Document</code>
*/
public static void printXML(Document doc) {
try {
// prepare the DOM document for writing
Source source = new DOMSource(doc);
// prepare the output
Result result = new StreamResult(System.out);
// write the DOM document to the file
// get Transformer
Transformer xformer =
TransformerFactory.newInstance().newTransformer();
xformer.setOutputProperty(
OutputKeys.INDENT, "yes");
xformer.setOutputProperty(
OutputKeys.ENCODING, "UTF-8");
xformer.setOutputProperty(
"{http://xml.apache.org/xslt}indent-amount", "2");
xformer.setOutputProperty(OutputKeys.METHOD, "xml");
// write to System.out
xformer.transform(source, result);
} catch (TransformerConfigurationException tce) {
// error generated during transformer configuration
System.err.println(tce.getMessage());
// use the contained exception, if any
Throwable x = tce;
if (tce.getException() != null) {
x = tce.getException();
}
x.printStackTrace();
} catch (TransformerException te) {
// error generated by the transformer
System.err.println(te.getMessage());
// use the contained exception, if any
Throwable x = te;
if (te.getException() != null) {
x = te.getException();
}
x.printStackTrace();
}
}
/**
* This returns a string representation of the given document.
*
* @param doc
* an XML <code>Document</code>
* @param encoding
* a <code>String</code> with the encoding to use
* @param docTypeDef
* a <code>String</code> with the DTD name; use <code>null</code>
* for no DTD
* @return a <code>String</code> with the XML string
*/
public static String getStringFromDoc(
Document doc, String encoding, String docTypeDef) {
try {
// use a Transformer for output
TransformerFactory tFactory = TransformerFactory.newInstance();
Transformer xformer = tFactory.newTransformer();
xformer.setOutputProperty(
OutputKeys.INDENT, "yes");
xformer.setOutputProperty(
OutputKeys.ENCODING, encoding);
xformer.setOutputProperty(
"{http://xml.apache.org/xslt}indent-amount", "2");
xformer.setOutputProperty(OutputKeys.METHOD, "xml");
if (null != docTypeDef) {
xformer
.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, docTypeDef);
}
DOMSource source = new DOMSource(doc);
StringWriter sw = new StringWriter();
StreamResult result = new StreamResult(sw);
xformer.transform(source, result);
return sw.toString();
} catch (TransformerConfigurationException tce) {
// error generated by the parser
System.err.println("** Transformer Factory error");
System.err.println(" " + tce.getMessage());
// use the contained exception, if any
Throwable x = tce;
if (tce.getException() != null) {
x = tce.getException();
}
x.printStackTrace();
} catch (TransformerException te) {
// error generated by the parser
System.err.println("** Transformation error");
System.err.println(" " + te.getMessage());
// use the contained exception, if any
Throwable x = te;
if (te.getException() != null) {
x = te.getException();
}
x.printStackTrace();
}
return null;
}
/**
* This method writes a DOM document to the given output stream.
*
* @param doc
* a DOM <code>Document</code>
* @param encoding
* a <code>String</code> with the encoding to use
* @param docTypeDef
* a <code>String</code> with the DTD name; use <code>null</code>
* for no DTD
* @param out
* an <code>OutputStream</code> where to write the DOM document
*/
public static void writeXml(
Document doc, String encoding, String docTypeDef, OutputStream out) {
try {
// prepare the DOM document
Source source = new DOMSource(doc);
// prepare the output
Result result = new StreamResult(out);
// write the DOM document to the file
// get Transformer
Transformer xformer =
TransformerFactory.newInstance().newTransformer();
xformer.setOutputProperty(
OutputKeys.INDENT, "yes");
xformer.setOutputProperty(
OutputKeys.ENCODING, encoding);
xformer.setOutputProperty(
"{http://xml.apache.org/xslt}indent-amount", "2");
xformer.setOutputProperty(OutputKeys.METHOD, "xml");
if (null != docTypeDef) {
xformer
.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, docTypeDef);
}
// write to a file
xformer.transform(source, result);
} catch (TransformerConfigurationException tce) {
// error generated during transformer configuration
System.err.println(tce.getMessage());
// use the contained exception, if any
Throwable x = tce;
if (tce.getException() != null) {
x = tce.getException();
}
x.printStackTrace();
} catch (TransformerException te) {
// error generated by the transformer
System.err.println(te.getMessage());
// use the contained exception, if any
Throwable x = te;
if (te.getException() != null) {
x = te.getException();
}
x.printStackTrace();
}
}
/**
* This parses the given XML string and creates a DOM Document.
*
* @param fileName
* a <code>String</code> with the source file name
* @param encoding
* a <code>String</code> denoting the encoding of the XML string
* @return Document a DOM <code>Document</code>, <code>null</code> if
* parsing fails
*/
public static Document parse(String xml, String encoding) {
if (encoding == null)
encoding = "UTF-8";
Document document = null;
// initiate DocumentBuilderFactory
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// to get a validating parser
factory.setValidating(false);
// to get one that understands namespaces
factory.setNamespaceAware(true);
factory.setIgnoringElementContentWhitespace(true);
try {
// get DocumentBuilder
DocumentBuilder builder = factory.newDocumentBuilder();
// parse and load into memory the Document
document =
builder.parse(new ByteArrayInputStream(xml.getBytes(encoding)));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return document;
}
/**
* This parses the given XML file and creates a DOM Document.
*
* @param fileName
* a <code>String</code> with the source file name
* @param validation
* a <code>boolean</code> indicatiing if the parsing uses DTD
* valudation
* @return Document a DOM <code>Document</code>, <code>null</code> if
* parsing fails
*/
public static Document parse(String fileName, boolean validation) {
Document document = null;
// initiate DocumentBuilderFactory
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// to get a validating parser
factory.setValidating(validation);
// to get one that understands namespaces
factory.setNamespaceAware(true);
factory.setIgnoringElementContentWhitespace(true);
try {
// get DocumentBuilder
DocumentBuilder builder = factory.newDocumentBuilder();
// parse and load into memory the Document
document = builder.parse(new File(fileName));
return document;
} catch (SAXParseException spe) {
// error generated by the parser
System.err.println(
"Parsing error, line " + spe.getLineNumber() + ", uri "
+ spe.getSystemId());
System.err.println(" " + spe.getMessage());
// use the contained exception, if any
Exception x = spe;
if (spe.getException() != null) {
x = spe.getException();
}
x.printStackTrace();
} catch (SAXException sxe) {
// error generated during parsing
System.err.println(sxe.getMessage());
// use the contained exception, if any
Exception x = sxe;
if (sxe.getException() != null) {
x = sxe.getException();
}
x.printStackTrace();
} catch (ParserConfigurationException pce) {
// parser with specified options can't be built
System.err.println(pce.getMessage());
pce.printStackTrace();
} catch (IOException ioe) {
// i/o error
System.err.println(ioe.getMessage());
ioe.printStackTrace();
}
return null;
}
/**
* This counts the elements in the given document by tag name.
*
* @param tag
* a <code>String</code> with a tag name
* @param doc
* a DOM <code>Document</code>
* @return number an <code>int</code> with the number of elements by tag
* name
*/
public static int countByTagName(String tag, Document doc) {
NodeList list = doc.getElementsByTagName(tag);
return list.getLength();
}
/**
* This realizes the <code>indexOf</code> method of the
* <code>java.util.List</code> interface for <code>NodeList</code>.
*
* @param list
* a <code>NodeList</code> value
* @param node
* a <code>Node</code> value
* @return an <code>int</code> value, giving the position of
* <code>node</code> in <code>list</code> or -1, if node is not
* contained in the list
*/
public static int indexOf(NodeList list, Node node) {
for (int i = 0, j = list.getLength(); i < j; ++i) {
if (list.item(i) == node) {
return i;
}
}
return -1;
}
/**
* This concatenates the string values of all text nodes which are direct
* children of the given node. If <code>node</code> is a text or attribute
* node, its value is returned. Otherwise <code>null</code> is returned
* (improvement potential!).
*
* @param node
* a <code>Node</code> value
* @return a <code>String</code> with the concatenated text
*/
public static String getText(Node node) {
short nodeType = node.getNodeType();
if ((nodeType == Node.TEXT_NODE) || (nodeType == Node.ATTRIBUTE_NODE)
|| (nodeType == Node.CDATA_SECTION_NODE)) {
return node.getNodeValue();
}
else if (nodeType == Node.ELEMENT_NODE) {
NodeList dtrs = node.getChildNodes();
StringBuffer sb = new StringBuffer();
for (int i = 0, j = dtrs.getLength(); i < j; ++i) {
Node item = dtrs.item(i);
if (item.getNodeType() == Node.TEXT_NODE
|| item.getNodeType() == Node.CDATA_SECTION_NODE) {
sb.append(item.getNodeValue());
}
}
return sb.toString();
}
return null;
}
/**
* This selects all direct children of the given element with the given
* name. If the name is <code>null</code>, all children are returned.
*
* @param ele
* an <code>Element</code> value
* @param name
* a <code>String</code> with the children's name
* @return a <code>List</code> of <code>Node</code>s with the children
*/
public static List<org.w3c.dom.Node> getChildren(
Element ele, String name) {
NodeList dtrs = ele.getChildNodes();
List<org.w3c.dom.Node> eles = new ArrayList<org.w3c.dom.Node>();
for (int i = 0, j = dtrs.getLength(); i < j; ++i) {
org.w3c.dom.Node item = dtrs.item(i);
if (name == null || item.getNodeName().equals(name)) {
eles.add(item);
}
}
return eles;
}
/**
* This selects all direct children of type 'Element' of the given element.
*
* @param ele
* an <code>Element</code> value
* @return a <code>List</code> of <code>Elmenet</code>s with the element
* children
*/
public static List<Element> getChildrenElements(Element ele) {
NodeList dtrs = ele.getChildNodes();
List<Element> eles = new ArrayList<Element>();
for (int i = 0, j = dtrs.getLength(); i < j; ++i) {
org.w3c.dom.Node item = dtrs.item(i);
if (item.getNodeType() == Node.ELEMENT_NODE) {
eles.add((Element)item);
}
}
return eles;
}
/**
* This returns the first child element with the given name found at the
* given element.
*
* @param ele
* an <code>Element</code> value
* @param name
* a <code>String</code> with the name of the child element
* @return a <code>Element</code> with the child or <code>null</code> if no
* such child was found
*/
public static Element getFirstChild(Element ele, String name) {
NodeList dtrs = ele.getChildNodes();
for (int i = 0, iMax = dtrs.getLength(); i < iMax; ++i) {
org.w3c.dom.Node item = dtrs.item(i);
if (item.getNodeName().equals(name)) {
return (Element)item;
}
}
return null;
}
/**
* This adds a new child with the given name to the given element.
*
* @param ele
* an <code>Element</code>
* @param name
* a <code>String</code> with the name of the child
* @return a <code>Element</code> with the newly created child
*/
public static Element addChild(Element ele, String name) {
Element child = ele.getOwnerDocument().createElement(name);
ele.appendChild(child);
return child;
}
}