blob: fa026d9b8bfd9cafd8764b6dee849f2ba57373cb [file] [log] [blame]
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
package org.apache.wiki.htmltowiki;
import org.apache.wiki.api.core.Context;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.input.SAXBuilder;
import org.jdom2.input.sax.XMLReaderSAX2Factory;
import org.jdom2.output.XMLOutputter;
import java.io.IOException;
import java.io.StringReader;
/**
* Converting Html to Wiki Markup with NekoHtml for converting html to xhtml and
* Xhtml2WikiTranslator for converting xhtml to Wiki Markup.
*
*/
public class HtmlStringToWikiTranslator
{
private static final String CYBERNEKO_PARSER = "org.cyberneko.html.parsers.SAXParser";
/**
* Create a new translator.
*/
public HtmlStringToWikiTranslator()
{}
/**
* Translates text from HTML into WikiMarkup without a WikiContext (meaning
* some things perhaps cannot be translated). Uses the default configuration.
*
* @param html HTML text to translate
* @return WikiMarkup
*
* @throws JDOMException If parsing fails
* @throws IOException For other kinds of errors.
*/
public String translate( final String html ) throws JDOMException, IOException
{
return translate( html, new XHtmlToWikiConfig() );
}
/**
* Translates text from HTML into WikiMarkup with a WikiContext. The translation
* accuracy is better. Uses the default configuration.
*
* @param html HTML text to translate
* @param wikiContext The WikiContext to use.
* @return WikiMarkup
*
* @throws JDOMException If parsing fails
* @throws IOException For other kinds of errors.
*/
public String translate( final String html, final Context wikiContext ) throws JDOMException, IOException
{
return translate( html, new XHtmlToWikiConfig( wikiContext ) );
}
/**
* Translates text from HTML into WikiMarkup using a specified configuration.
*
* @param html HTML text to translate
* @param config The configuration to use.
* @return WikiMarkup
*
* @throws JDOMException If parsing fails
* @throws IOException For other kinds of errors.
*/
public String translate( final String html, final XHtmlToWikiConfig config ) throws JDOMException, IOException
{
final Element element = htmlStringToElement( html );
final XHtmlElementToWikiTranslator xhtmlTranslator = new XHtmlElementToWikiTranslator( element, config );
final String wikiMarkup = xhtmlTranslator.getWikiString();
return wikiMarkup;
}
/**
* Use NekoHtml to parse HTML like well formed XHTML
*
* @param html
* @return xhtml jdom root element (node "HTML")
* @throws JDOMException
* @throws IOException
*/
private Element htmlStringToElement( final String html ) throws JDOMException, IOException
{
final SAXBuilder builder = new SAXBuilder( new XMLReaderSAX2Factory( true, CYBERNEKO_PARSER), null, null );
final Document doc = builder.build( new StringReader( html ) );
final Element element = doc.getRootElement();
return element;
}
/**
* A static helper method to create HTML from an Element.
*
* @param element The element to get HTML from.
* @return HTML
*/
public static String element2String( final Element element )
{
final Document document = new Document( element );
final XMLOutputter outputter = new XMLOutputter();
return outputter.outputString( document );
}
}