blob: 20019f4cb9f09d55841faaad8eb1bf0aedf1cacb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.cocoon.portal.util;
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import java.util.Properties;
import org.apache.cocoon.xml.ContentHandlerWrapper;
import org.apache.excalibur.xml.sax.XMLConsumer;
import org.apache.xerces.parsers.AbstractSAXParser;
import org.cyberneko.html.HTMLConfiguration;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.ext.LexicalHandler;
/**
* This parser uses the nekohtml parser to parse html and generate sax streams.
*
* @version $Id$
*/
public class HtmlSaxParser extends AbstractSAXParser {
public HtmlSaxParser(Properties properties) {
super(getConfig(properties));
}
protected static HTMLConfiguration getConfig(Properties properties) {
HTMLConfiguration config = new HTMLConfiguration();
config.setProperty("http://cyberneko.org/html/properties/names/elems", "lower");
if (properties != null) {
for (Iterator i = properties.keySet().iterator();i.hasNext();) {
String name = (String) i.next();
config.setProperty(name, properties.getProperty(name));
}
}
return config;
}
/**
* Parse html stored in the string.
*/
public static void parseString(String content, ContentHandler ch)
throws SAXException {
final HtmlSaxParser parser = new HtmlSaxParser(null);
parser.setContentHandler(ch);
if ( ch instanceof LexicalHandler ) {
parser.setLexicalHandler((LexicalHandler)ch);
}
final InputSource is = new InputSource(new StringReader(content));
try {
parser.parse(is);
} catch (IOException ioe) {
throw new SAXException("Error during parsing of html markup.", ioe);
}
}
public static XMLConsumer getContentFilter(ContentHandler ch) {
return new ContentFilter(ch);
}
protected static final class ContentFilter extends ContentHandlerWrapper {
public ContentFilter(ContentHandler ch) {
this.setContentHandler(ch);
if ( ch instanceof LexicalHandler ) {
this.setLexicalHandler((LexicalHandler)ch);
}
}
/**
* @see org.xml.sax.ContentHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
public void endElement(String uri, String loc, String raw) throws SAXException {
if ( !loc.equals("html") && !loc.equals("body") ) {
super.endElement(uri, loc, raw);
}
}
/**
* @see org.xml.sax.ContentHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
public void startElement(String uri, String loc, String raw, Attributes a) throws SAXException {
if ( !loc.equals("html") && !loc.equals("body") ) {
super.startElement(uri, loc, raw, a);
}
}
}
}