| /* |
| Licensed to the Apache Software Foundation (ASF) under one |
| or more contributor license agreements. See the NOTICE file |
| distributed with this work for additional information |
| regarding copyright ownership. The ASF licenses this file |
| to you under the Apache License, Version 2.0 (the |
| "License"); you may not use this file except in compliance |
| with the License. You may obtain a copy of the License at |
| |
| http://www.apache.org/licenses/LICENSE-2.0 |
| |
| Unless required by applicable law or agreed to in writing, |
| software distributed under the License is distributed on an |
| "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| KIND, either express or implied. See the License for the |
| specific language governing permissions and limitations |
| under the License. |
| */ |
| package org.apache.wiki.parser; |
| |
| import org.apache.commons.lang3.StringUtils; |
| import org.apache.commons.text.StringEscapeUtils; |
| import org.apache.log4j.Logger; |
| import org.apache.oro.text.regex.MalformedPatternException; |
| import org.apache.oro.text.regex.MatchResult; |
| import org.apache.oro.text.regex.Pattern; |
| import org.apache.oro.text.regex.PatternCompiler; |
| import org.apache.oro.text.regex.PatternMatcher; |
| import org.apache.oro.text.regex.Perl5Compiler; |
| import org.apache.oro.text.regex.Perl5Matcher; |
| import org.apache.wiki.InternalWikiException; |
| import org.apache.wiki.StringTransmutator; |
| import org.apache.wiki.WikiContext; |
| import org.apache.wiki.WikiPage; |
| import org.apache.wiki.api.exceptions.PluginException; |
| import org.apache.wiki.api.plugin.WikiPlugin; |
| import org.apache.wiki.attachment.AttachmentManager; |
| import org.apache.wiki.auth.AuthorizationManager; |
| import org.apache.wiki.auth.UserManager; |
| import org.apache.wiki.auth.WikiSecurityException; |
| import org.apache.wiki.auth.acl.Acl; |
| import org.apache.wiki.auth.acl.AclManager; |
| import org.apache.wiki.i18n.InternationalizationManager; |
| import org.apache.wiki.preferences.Preferences; |
| import org.apache.wiki.util.TextUtil; |
| import org.apache.wiki.util.XmlUtil; |
| import org.apache.wiki.variables.VariableManager; |
| import org.jdom2.Attribute; |
| import org.jdom2.Content; |
| import org.jdom2.Element; |
| import org.jdom2.IllegalDataException; |
| import org.jdom2.ProcessingInstruction; |
| import org.jdom2.Verifier; |
| |
| import javax.xml.transform.Result; |
| import java.io.IOException; |
| import java.io.Reader; |
| import java.io.StringReader; |
| import java.text.MessageFormat; |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collection; |
| import java.util.EmptyStackException; |
| import java.util.HashMap; |
| import java.util.Iterator; |
| import java.util.List; |
| import java.util.Map; |
| import java.util.Properties; |
| import java.util.ResourceBundle; |
| import java.util.Stack; |
| |
| /** |
| * Parses JSPWiki-style markup into a WikiDocument DOM tree. This class is the |
| * heart and soul of JSPWiki : make sure you test properly anything that is added, |
| * or else it breaks down horribly. |
| * |
| * @since 2.4 |
| */ |
| public class JSPWikiMarkupParser extends MarkupParser { |
| |
| protected static final int READ = 0; |
| protected static final int EDIT = 1; |
| protected static final int EMPTY = 2; // Empty message |
| protected static final int LOCAL = 3; |
| protected static final int LOCALREF = 4; |
| protected static final int IMAGE = 5; |
| protected static final int EXTERNAL = 6; |
| protected static final int INTERWIKI = 7; |
| protected static final int IMAGELINK = 8; |
| protected static final int IMAGEWIKILINK = 9; |
| protected static final int ATTACHMENT = 10; |
| |
| private static final Logger log = Logger.getLogger( JSPWikiMarkupParser.class ); |
| |
| private boolean m_isbold = false; |
| private boolean m_isitalic = false; |
| private boolean m_istable = false; |
| private boolean m_isPre = false; |
| private boolean m_isEscaping = false; |
| private boolean m_isdefinition = false; |
| private boolean m_isPreBlock = false; |
| |
| /** Contains style information, in multiple forms. */ |
| private Stack<Boolean> m_styleStack = new Stack<>(); |
| |
| // general list handling |
| private int m_genlistlevel = 0; |
| private StringBuilder m_genlistBulletBuffer = new StringBuilder(10); // stores the # and * pattern |
| private boolean m_allowPHPWikiStyleLists = true; |
| |
| private boolean m_isOpenParagraph = false; |
| |
| /** Parser for extended link functionality. */ |
| private LinkParser m_linkParser = new LinkParser(); |
| |
| /** Keeps track of any plain text that gets put in the Text nodes */ |
| private StringBuilder m_plainTextBuf = new StringBuilder(20); |
| |
| private Element m_currentElement; |
| |
| /** Keep track of duplicate header names. */ |
| private Map<String, Integer> m_titleSectionCounter = new HashMap<>(); |
| |
| /** If true, consider CamelCase hyperlinks as well. */ |
| public static final String PROP_CAMELCASELINKS = "jspwiki.translatorReader.camelCaseLinks"; |
| |
| /** If true, all hyperlinks are translated as well, regardless whether they |
| are surrounded by brackets. */ |
| public static final String PROP_PLAINURIS = "jspwiki.translatorReader.plainUris"; |
| |
| /** If true, all outward attachment info links have a small link image appended. */ |
| public static final String PROP_USEATTACHMENTIMAGE = "jspwiki.translatorReader.useAttachmentImage"; |
| |
| /** If true, then considers CamelCase links as well. */ |
| private boolean m_camelCaseLinks = false; |
| |
| /** If true, then generate special output for wysiwyg editing in certain cases */ |
| private boolean m_wysiwygEditorMode = false; |
| |
| /** If true, consider URIs that have no brackets as well. */ |
| // FIXME: Currently reserved, but not used. |
| private boolean m_plainUris = false; |
| |
| /** If true, all outward links use a small link image. */ |
| private boolean m_useOutlinkImage = true; |
| |
| private boolean m_useAttachmentImage = true; |
| |
| /** If true, allows raw HTML. */ |
| private boolean m_allowHTML = false; |
| |
| private boolean m_useRelNofollow = false; |
| |
| private PatternCompiler m_compiler = new Perl5Compiler(); |
| |
| static final String WIKIWORD_REGEX = "(^|[[:^alnum:]]+)([[:upper:]]+[[:lower:]]+[[:upper:]]+[[:alnum:]]*|(http://|https://|mailto:)([A-Za-z0-9_/\\.\\+\\?\\#\\-\\@=&;~%]+))"; |
| |
| private PatternMatcher m_camelCaseMatcher = new Perl5Matcher(); |
| private Pattern m_camelCasePattern; |
| |
| private int m_rowNum = 1; |
| |
| private Heading m_lastHeading = null; |
| |
| private static final String CAMELCASE_PATTERN = "JSPWikiMarkupParser.camelCasePattern"; |
| |
| /** |
| * Creates a markup parser. |
| * |
| * @param context The WikiContext which controls the parsing |
| * @param in Where the data is read from. |
| */ |
| public JSPWikiMarkupParser( final WikiContext context, final Reader in ) |
| { |
| super( context, in ); |
| initialize(); |
| } |
| |
| // FIXME: parsers should be pooled for better performance. |
| private void initialize() { |
| initInlineImagePatterns(); |
| |
| m_camelCasePattern = m_engine.getAttribute( CAMELCASE_PATTERN ); |
| if( m_camelCasePattern == null ) { |
| try { |
| m_camelCasePattern = m_compiler.compile( WIKIWORD_REGEX,Perl5Compiler.DEFAULT_MASK|Perl5Compiler.READ_ONLY_MASK ); |
| } catch( final MalformedPatternException e ) { |
| log.fatal("Internal error: Someone put in a faulty pattern.",e); |
| throw new InternalWikiException("Faulty camelcasepattern in TranslatorReader", e); |
| } |
| m_engine.setAttribute( CAMELCASE_PATTERN, m_camelCasePattern ); |
| } |
| |
| // Set the properties. |
| final Properties props = m_engine.getWikiProperties(); |
| final String cclinks = m_context.getPage().getAttribute( PROP_CAMELCASELINKS ); |
| |
| if( cclinks != null ) { |
| m_camelCaseLinks = TextUtil.isPositive( cclinks ); |
| } else { |
| m_camelCaseLinks = TextUtil.getBooleanProperty( props, PROP_CAMELCASELINKS, m_camelCaseLinks ); |
| } |
| |
| final Boolean wysiwygVariable = (Boolean)m_context.getVariable( WikiContext.VAR_WYSIWYG_EDITOR_MODE ); |
| if( wysiwygVariable != null ) { |
| m_wysiwygEditorMode = wysiwygVariable; |
| } |
| |
| m_plainUris = m_context.getBooleanWikiProperty( PROP_PLAINURIS, m_plainUris ); |
| m_useOutlinkImage = m_context.getBooleanWikiProperty( PROP_USEOUTLINKIMAGE, m_useOutlinkImage ); |
| m_useAttachmentImage = m_context.getBooleanWikiProperty( PROP_USEATTACHMENTIMAGE, m_useAttachmentImage ); |
| m_allowHTML = m_context.getBooleanWikiProperty( PROP_ALLOWHTML, m_allowHTML ); |
| m_useRelNofollow = m_context.getBooleanWikiProperty( PROP_USERELNOFOLLOW, m_useRelNofollow ); |
| |
| if( m_engine.getManager( UserManager.class ).getUserDatabase() == null || m_engine.getManager( AuthorizationManager.class ) == null ) { |
| disableAccessRules(); |
| } |
| |
| m_context.getPage().setHasMetadata(); |
| } |
| |
| /** |
| * Calls a transmutator chain. |
| * |
| * @param list Chain to call |
| * @param text Text that should be passed to the mutate() method of each of the mutators in the chain. |
| * @return The result of the mutation. |
| */ |
| protected String callMutatorChain( final Collection< StringTransmutator > list, String text ) { |
| if( list == null || list.size() == 0 ) { |
| return text; |
| } |
| |
| for( final StringTransmutator m : list ) { |
| text = m.mutate( m_context, text ); |
| } |
| |
| return text; |
| } |
| |
| /** |
| * Calls the heading listeners. |
| * |
| * @param param A Heading object. |
| */ |
| protected void callHeadingListenerChain( final Heading param ) |
| { |
| final List< HeadingListener > list = m_headingListenerChain; |
| |
| for( final Iterator< HeadingListener > i = list.iterator(); i.hasNext(); ) |
| { |
| final HeadingListener h = i.next(); |
| |
| h.headingAdded( m_context, param ); |
| } |
| } |
| |
| /** |
| * Creates a JDOM anchor element. Can be overridden to change the URL creation, |
| * if you really know what you are doing. |
| * |
| * @param type One of the types above |
| * @param link URL to which to link to |
| * @param text Link text |
| * @param section If a particular section identifier is required. |
| * @return An A element. |
| * @since 2.4.78 |
| */ |
| protected Element createAnchor( final int type, final String link, String text, String section) |
| { |
| text = escapeHTMLEntities( text ); |
| section = escapeHTMLEntities( section ); |
| final Element el = new Element("a"); |
| el.setAttribute("class",CLASS_TYPES[type]); |
| el.setAttribute("href",link+section); |
| el.addContent(text); |
| return el; |
| } |
| |
| private Element makeLink( int type, final String link, String text, String section, final Iterator< Attribute > attributes ) |
| { |
| Element el = null; |
| |
| if( text == null ) text = link; |
| |
| text = callMutatorChain( m_linkMutators, text ); |
| |
| section = (section != null) ? ("#"+section) : ""; |
| |
| // Make sure we make a link name that can be accepted |
| // as a valid URL. |
| |
| if( link.length() == 0 ) |
| { |
| type = EMPTY; |
| } |
| final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); |
| |
| switch(type) |
| { |
| case READ: |
| el = createAnchor( READ, m_context.getURL(WikiContext.VIEW, link), text, section ); |
| break; |
| |
| case EDIT: |
| el = createAnchor( EDIT, m_context.getURL(WikiContext.EDIT,link), text, "" ); |
| el.setAttribute("title", MessageFormat.format( rb.getString( "markupparser.link.create" ), link ) ); |
| |
| break; |
| |
| case EMPTY: |
| el = new Element("u").addContent(text); |
| break; |
| |
| // |
| // These two are for local references - footnotes and |
| // references to footnotes. |
| // We embed the page name (or whatever WikiContext gives us) |
| // to make sure the links are unique across Wiki. |
| // |
| case LOCALREF: |
| el = createAnchor( LOCALREF, "#ref-"+m_context.getName()+"-"+link, "["+text+"]", "" ); |
| break; |
| |
| case LOCAL: |
| el = new Element("a").setAttribute("class",CLASS_FOOTNOTE); |
| el.setAttribute("name", "ref-"+m_context.getName()+"-"+link.substring(1)); |
| el.addContent("["+text+"]"); |
| break; |
| |
| // |
| // With the image, external and interwiki types we need to |
| // make sure nobody can put in Javascript or something else |
| // annoying into the links themselves. We do this by preventing |
| // a haxor from stopping the link name short with quotes in |
| // fillBuffer(). |
| // |
| case IMAGE: |
| el = new Element("img").setAttribute("class","inline"); |
| el.setAttribute("src",link); |
| el.setAttribute("alt",text); |
| break; |
| |
| case IMAGELINK: |
| el = new Element("img").setAttribute("class","inline"); |
| el.setAttribute("src",link); |
| el.setAttribute("alt",text); |
| el = createAnchor(IMAGELINK,text,"","").addContent(el); |
| break; |
| |
| case IMAGEWIKILINK: |
| final String pagelink = m_context.getURL(WikiContext.VIEW,text); |
| el = new Element("img").setAttribute("class","inline"); |
| el.setAttribute("src",link); |
| el.setAttribute("alt",text); |
| el = createAnchor(IMAGEWIKILINK,pagelink,"","").addContent(el); |
| break; |
| |
| case EXTERNAL: |
| el = createAnchor( EXTERNAL, link, text, section ); |
| if( m_useRelNofollow ) el.setAttribute("rel","nofollow"); |
| break; |
| |
| case INTERWIKI: |
| el = createAnchor( INTERWIKI, link, text, section ); |
| break; |
| |
| case ATTACHMENT: |
| final String attlink = m_context.getURL( WikiContext.ATTACH, |
| link ); |
| |
| final String infolink = m_context.getURL( WikiContext.INFO, |
| link ); |
| |
| final String imglink = m_context.getURL( WikiContext.NONE, |
| "images/attachment_small.png" ); |
| |
| el = createAnchor( ATTACHMENT, attlink, text, "" ); |
| |
| if( m_engine.getManager( AttachmentManager.class ).forceDownload( attlink ) ) |
| { |
| el.setAttribute("download", ""); |
| } |
| |
| pushElement(el); |
| popElement(el.getName()); |
| |
| if( m_useAttachmentImage ) |
| { |
| el = new Element("img").setAttribute("src",imglink); |
| el.setAttribute("border","0"); |
| el.setAttribute("alt","(info)"); |
| |
| el = new Element("a").setAttribute("href",infolink).addContent(el); |
| el.setAttribute("class","infolink"); |
| } |
| else |
| { |
| el = null; |
| } |
| break; |
| |
| default: |
| break; |
| } |
| |
| if( el != null && attributes != null ) |
| { |
| while( attributes.hasNext() ) |
| { |
| final Attribute attr = attributes.next(); |
| if( attr != null ) |
| { |
| el.setAttribute(attr); |
| } |
| } |
| } |
| |
| if( el != null ) |
| { |
| flushPlainText(); |
| m_currentElement.addContent( el ); |
| } |
| return el; |
| } |
| |
| /** |
| * These are all of the HTML 4.01 block-level elements. |
| */ |
| private static final String[] BLOCK_ELEMENTS = { |
| "address", "blockquote", "div", "dl", "fieldset", "form", |
| "h1", "h2", "h3", "h4", "h5", "h6", |
| "hr", "noscript", "ol", "p", "pre", "table", "ul" |
| }; |
| |
| private static boolean isBlockLevel( final String name ) |
| { |
| return Arrays.binarySearch( BLOCK_ELEMENTS, name ) >= 0; |
| } |
| |
| /** |
| * This method peeks ahead in the stream until EOL and returns the result. |
| * It will keep the buffers untouched. |
| * |
| * @return The string from the current position to the end of line. |
| */ |
| |
| // FIXME: Always returns an empty line, even if the stream is full. |
| private String peekAheadLine() |
| throws IOException |
| { |
| final String s = readUntilEOL().toString(); |
| |
| if( s.length() > PUSHBACK_BUFFER_SIZE ) |
| { |
| log.warn("Line is longer than maximum allowed size ("+PUSHBACK_BUFFER_SIZE+" characters. Attempting to recover..."); |
| pushBack( s.substring(0,PUSHBACK_BUFFER_SIZE-1) ); |
| } |
| else |
| { |
| try |
| { |
| pushBack( s ); |
| } |
| catch( final IOException e ) |
| { |
| log.warn("Pushback failed: the line is probably too long. Attempting to recover."); |
| } |
| } |
| return s; |
| } |
| |
| private int flushPlainText() |
| { |
| final int numChars = m_plainTextBuf.length(); |
| |
| if( numChars > 0 ) |
| { |
| String buf; |
| |
| if( !m_allowHTML ) |
| { |
| buf = escapeHTMLEntities(m_plainTextBuf.toString()); |
| } |
| else |
| { |
| buf = m_plainTextBuf.toString(); |
| } |
| // |
| // We must first empty the buffer because the side effect of |
| // calling makeCamelCaseLink() is to call this routine. |
| // |
| |
| m_plainTextBuf = new StringBuilder(20); |
| |
| try |
| { |
| // |
| // This is the heaviest part of parsing, and therefore we can |
| // do some optimization here. |
| // |
| // 1) Only when the length of the buffer is big enough, we try to do the match |
| // |
| |
| if( m_camelCaseLinks && !m_isEscaping && buf.length() > 3 ) |
| { |
| // System.out.println("Buffer="+buf); |
| |
| while( m_camelCaseMatcher.contains( buf, m_camelCasePattern ) ) |
| { |
| final MatchResult result = m_camelCaseMatcher.getMatch(); |
| |
| final String firstPart = buf.substring(0,result.beginOffset(0)); |
| String prefix = result.group(1); |
| |
| if( prefix == null ) prefix = ""; |
| |
| final String camelCase = result.group(2); |
| final String protocol = result.group(3); |
| String uri = protocol+result.group(4); |
| buf = buf.substring(result.endOffset(0)); |
| |
| m_currentElement.addContent( firstPart ); |
| |
| // |
| // Check if the user does not wish to do URL or WikiWord expansion |
| // |
| if( prefix.endsWith("~") || prefix.indexOf('[') != -1 ) |
| { |
| if( prefix.endsWith("~") ) |
| { |
| if( m_wysiwygEditorMode ) |
| { |
| m_currentElement.addContent( "~" ); |
| } |
| prefix = prefix.substring(0,prefix.length()-1); |
| } |
| if( camelCase != null ) |
| { |
| m_currentElement.addContent( prefix+camelCase ); |
| } |
| else if( protocol != null ) |
| { |
| m_currentElement.addContent( prefix+uri ); |
| } |
| continue; |
| } |
| |
| // |
| // Fine, then let's check what kind of a link this was |
| // and emit the proper elements |
| // |
| if( protocol != null ) |
| { |
| final char c = uri.charAt(uri.length()-1); |
| if( c == '.' || c == ',' ) |
| { |
| uri = uri.substring(0,uri.length()-1); |
| buf = c + buf; |
| } |
| // System.out.println("URI match "+uri); |
| m_currentElement.addContent( prefix ); |
| makeDirectURILink( uri ); |
| } |
| else |
| { |
| // System.out.println("Matched: '"+camelCase+"'"); |
| // System.out.println("Split to '"+firstPart+"', and '"+buf+"'"); |
| // System.out.println("prefix="+prefix); |
| m_currentElement.addContent( prefix ); |
| |
| makeCamelCaseLink( camelCase ); |
| } |
| } |
| |
| m_currentElement.addContent( buf ); |
| } |
| else |
| { |
| // |
| // No camelcase asked for, just add the elements |
| // |
| m_currentElement.addContent( buf ); |
| } |
| } |
| catch( final IllegalDataException e ) |
| { |
| // |
| // Sometimes it's possible that illegal XML chars is added to the data. |
| // Here we make sure it does not stop parsing. |
| // |
| m_currentElement.addContent( makeError(cleanupSuspectData( e.getMessage() )) ); |
| } |
| } |
| |
| return numChars; |
| } |
| |
| /** |
| * Escapes XML entities in a HTML-compatible way (i.e. does not escape |
| * entities that are already escaped). |
| * |
| * @param buf |
| * @return An escaped string. |
| */ |
| private String escapeHTMLEntities( final String buf) |
| { |
| final StringBuilder tmpBuf = new StringBuilder( buf.length() + 20 ); |
| |
| for( int i = 0; i < buf.length(); i++ ) |
| { |
| final char ch = buf.charAt(i); |
| |
| if( ch == '<' ) |
| { |
| tmpBuf.append("<"); |
| } |
| else if( ch == '>' ) |
| { |
| tmpBuf.append(">"); |
| } |
| else if( ch == '\"' ) |
| { |
| tmpBuf.append("""); |
| } |
| else if( ch == '&' ) |
| { |
| // |
| // If the following is an XML entity reference (&#.*;) we'll |
| // leave it as it is; otherwise we'll replace it with an & |
| // |
| |
| boolean isEntity = false; |
| final StringBuilder entityBuf = new StringBuilder(); |
| |
| if( i < buf.length() -1 ) |
| { |
| for( int j = i; j < buf.length(); j++ ) |
| { |
| final char ch2 = buf.charAt(j); |
| |
| if( Character.isLetterOrDigit( ch2 ) || (ch2 == '#' && j == i+1) || ch2 == ';' || ch2 == '&' ) |
| { |
| entityBuf.append(ch2); |
| |
| if( ch2 == ';' ) |
| { |
| isEntity = true; |
| break; |
| } |
| } |
| else |
| { |
| break; |
| } |
| } |
| } |
| |
| if( isEntity ) |
| { |
| tmpBuf.append( entityBuf ); |
| i = i + entityBuf.length() - 1; |
| } |
| else |
| { |
| tmpBuf.append("&"); |
| } |
| |
| } |
| else |
| { |
| tmpBuf.append( ch ); |
| } |
| } |
| |
| return tmpBuf.toString(); |
| } |
| |
| private Element pushElement( final Element e ) |
| { |
| flushPlainText(); |
| m_currentElement.addContent( e ); |
| m_currentElement = e; |
| |
| return e; |
| } |
| |
| private Element addElement( final Content e ) |
| { |
| if( e != null ) |
| { |
| flushPlainText(); |
| m_currentElement.addContent( e ); |
| } |
| return m_currentElement; |
| } |
| |
| /** |
| * All elements that can be empty by the HTML DTD. |
| */ |
| // Keep sorted. |
| private static final String[] EMPTY_ELEMENTS = { |
| "area", "base", "br", "col", "hr", "img", "input", "link", "meta", "p", "param" |
| }; |
| |
| /** |
| * Goes through the current element stack and pops all elements until this |
| * element is found - this essentially "closes" and element. |
| * |
| * @param s |
| * @return The new current element, or null, if there was no such element in the entire stack. |
| */ |
| private Element popElement( final String s ) |
| { |
| final int flushedBytes = flushPlainText(); |
| |
| Element currEl = m_currentElement; |
| |
| while( currEl.getParentElement() != null ) |
| { |
| if( currEl.getName().equals(s) && !currEl.isRootElement() ) |
| { |
| m_currentElement = currEl.getParentElement(); |
| |
| // |
| // Check if it's okay for this element to be empty. Then we will |
| // trick the JDOM generator into not generating an empty element, |
| // by putting an empty string between the tags. Yes, it's a kludge |
| // but what'cha gonna do about it. :-) |
| // |
| |
| if( flushedBytes == 0 && Arrays.binarySearch( EMPTY_ELEMENTS, s ) < 0 ) |
| { |
| currEl.addContent(""); |
| } |
| |
| return m_currentElement; |
| } |
| |
| currEl = currEl.getParentElement(); |
| } |
| |
| return null; |
| } |
| |
| |
| /** |
| * Reads the stream until it meets one of the specified |
| * ending characters, or stream end. The ending character will be left |
| * in the stream. |
| */ |
| private String readUntil( final String endChars ) |
| throws IOException |
| { |
| final StringBuilder sb = new StringBuilder( 80 ); |
| int ch = nextToken(); |
| |
| while( ch != -1 ) |
| { |
| if( ch == '\\' ) |
| { |
| ch = nextToken(); |
| if( ch == -1 ) |
| { |
| break; |
| } |
| } |
| else |
| { |
| if( endChars.indexOf((char)ch) != -1 ) |
| { |
| pushBack( ch ); |
| break; |
| } |
| } |
| sb.append( (char) ch ); |
| ch = nextToken(); |
| } |
| |
| return sb.toString(); |
| } |
| |
| /** |
| * Reads the stream while the characters that have been specified are |
| * in the stream, returning then the result as a String. |
| */ |
| private String readWhile( final String endChars ) |
| throws IOException |
| { |
| final StringBuilder sb = new StringBuilder( 80 ); |
| int ch = nextToken(); |
| |
| while( ch != -1 ) |
| { |
| if( endChars.indexOf((char)ch) == -1 ) |
| { |
| pushBack( ch ); |
| break; |
| } |
| |
| sb.append( (char) ch ); |
| ch = nextToken(); |
| } |
| |
| return sb.toString(); |
| } |
| |
| private JSPWikiMarkupParser m_cleanTranslator; |
| |
| /** |
| * Does a lazy init. Otherwise, we would get into a situation where HTMLRenderer would try and boot a TranslatorReader before |
| * the TranslatorReader it is contained by is up. |
| */ |
| private JSPWikiMarkupParser getCleanTranslator() { |
| if( m_cleanTranslator == null ) { |
| final WikiContext dummyContext = new WikiContext( m_engine, m_context.getHttpRequest(), m_context.getPage() ); |
| m_cleanTranslator = new JSPWikiMarkupParser( dummyContext, null ); |
| m_cleanTranslator.m_allowHTML = true; |
| } |
| |
| return m_cleanTranslator; |
| } |
| /** |
| * Modifies the "hd" parameter to contain proper values. Because |
| * an "id" tag may only contain [a-zA-Z0-9:_-], we'll replace the |
| * % after url encoding with '_'. |
| * <p> |
| * Counts also duplicate headings (= headings with similar name), and |
| * attaches a counter. |
| */ |
| private String makeHeadingAnchor( final String baseName, String title, final Heading hd ) { |
| hd.m_titleText = title; |
| title = MarkupParser.wikifyLink( title ); |
| hd.m_titleSection = m_engine.encodeName(title); |
| |
| if( m_titleSectionCounter.containsKey( hd.m_titleSection ) ) { |
| final Integer count = m_titleSectionCounter.get( hd.m_titleSection ) + 1; |
| m_titleSectionCounter.put( hd.m_titleSection, count ); |
| hd.m_titleSection += "-" + count; |
| } else { |
| m_titleSectionCounter.put( hd.m_titleSection, 1 ); |
| } |
| |
| hd.m_titleAnchor = "section-" + m_engine.encodeName( baseName ) + "-" + hd.m_titleSection; |
| hd.m_titleAnchor = hd.m_titleAnchor.replace( '%', '_' ); |
| hd.m_titleAnchor = hd.m_titleAnchor.replace( '/', '_' ); |
| |
| return hd.m_titleAnchor; |
| } |
| |
| private String makeSectionTitle( String title ) { |
| title = title.trim(); |
| try { |
| final JSPWikiMarkupParser dtr = getCleanTranslator(); |
| dtr.setInputReader( new StringReader( title ) ); |
| final WikiDocument doc = dtr.parse(); |
| doc.setContext( m_context ); |
| |
| return XmlUtil.extractTextFromDocument( doc ); |
| } catch( final IOException e ) { |
| log.fatal("Title parsing not working", e ); |
| throw new InternalWikiException( "Xml text extraction not working as expected when cleaning title" + e.getMessage() , e ); |
| } |
| } |
| |
| /** |
| * Returns XHTML for the heading. |
| * |
| * @param level The level of the heading. @see Heading |
| * @param title the title for the heading |
| * @param hd a List to which heading should be added |
| * @return An Element containing the heading |
| */ |
| public Element makeHeading( final int level, final String title, final Heading hd ) { |
| final Element el; |
| final String pageName = m_context.getPage().getName(); |
| final String outTitle = makeSectionTitle( title ); |
| hd.m_level = level; |
| |
| switch( level ) { |
| case Heading.HEADING_SMALL: |
| el = new Element( "h4" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); |
| break; |
| |
| case Heading.HEADING_MEDIUM: |
| el = new Element( "h3" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); |
| break; |
| |
| case Heading.HEADING_LARGE: |
| el = new Element( "h2" ).setAttribute("id",makeHeadingAnchor( pageName, outTitle, hd ) ); |
| break; |
| |
| default: |
| throw new InternalWikiException( "Illegal heading type " + level ); |
| } |
| |
| return el; |
| } |
| |
| /** |
| * When given a link to a WikiName, we just return |
| * a proper HTML link for it. The local link mutator |
| * chain is also called. |
| */ |
| private Element makeCamelCaseLink( final String wikiname ) |
| { |
| final String matchedLink = m_linkParsingOperations.linkIfExists( wikiname ); |
| |
| callMutatorChain( m_localLinkMutatorChain, wikiname ); |
| |
| if( matchedLink != null ) { |
| makeLink( READ, matchedLink, wikiname, null, null ); |
| } else { |
| makeLink( EDIT, wikiname, wikiname, null, null ); |
| } |
| |
| return m_currentElement; |
| } |
| |
| /** Holds the image URL for the duration of this parser */ |
| private String m_outlinkImageURL = null; |
| |
| /** |
| * Returns an element for the external link image (out.png). However, |
| * this method caches the URL for the lifetime of this MarkupParser, |
| * because it's commonly used, and we'll end up with possibly hundreds |
| * our thousands of references to it... It's a lot faster, too. |
| * |
| * @return An element containing the HTML for the outlink image. |
| */ |
| private Element outlinkImage() |
| { |
| Element el = null; |
| |
| if( m_useOutlinkImage ) |
| { |
| if( m_outlinkImageURL == null ) |
| { |
| m_outlinkImageURL = m_context.getURL( WikiContext.NONE, OUTLINK_IMAGE ); |
| } |
| |
| el = new Element( "img" ).setAttribute( "class", OUTLINK ); |
| el.setAttribute( "src", m_outlinkImageURL ); |
| el.setAttribute( "alt","" ); |
| } |
| |
| return el; |
| } |
| |
| /** |
| * Takes an URL and turns it into a regular wiki link. Unfortunately, |
| * because of the way that flushPlainText() works, it already encodes |
| * all of the XML entities. But so does WikiContext.getURL(), so we |
| * have to do a reverse-replace here, so that it can again be replaced in makeLink. |
| * <p> |
| * What a crappy problem. |
| * |
| * @param url |
| * @return An anchor Element containing the link. |
| */ |
| private Element makeDirectURILink( String url ) { |
| final Element result; |
| String last = null; |
| |
| if( url.endsWith( "," ) || url.endsWith( "." ) ) { |
| last = url.substring( url.length() - 1 ); |
| url = url.substring( 0, url.length() - 1 ); |
| } |
| |
| callMutatorChain( m_externalLinkMutatorChain, url ); |
| |
| if( m_linkParsingOperations.isImageLink( url, isImageInlining(), getInlineImagePatterns() ) ) { |
| result = handleImageLink( StringUtils.replace( url, "&", "&" ), url, false ); |
| } else { |
| result = makeLink( EXTERNAL, StringUtils.replace( url, "&", "&" ), url, null, null ); |
| addElement( outlinkImage() ); |
| } |
| |
| if( last != null ) { |
| m_plainTextBuf.append( last ); |
| } |
| |
| return result; |
| } |
| |
| /** |
| * Image links are handled differently: |
| * 1. If the text is a WikiName of an existing page, |
| * it gets linked. |
| * 2. If the text is an external link, then it is inlined. |
| * 3. Otherwise it becomes an ALT text. |
| * |
| * @param reallink The link to the image. |
| * @param link Link text portion, may be a link to somewhere else. |
| * @param hasLinkText If true, then the defined link had a link text available. |
| * This means that the link text may be a link to a wiki page, |
| * or an external resource. |
| */ |
| |
| // FIXME: isExternalLink() is called twice. |
| private Element handleImageLink( final String reallink, final String link, final boolean hasLinkText ) |
| { |
| final String possiblePage = MarkupParser.cleanLink( link ); |
| |
| if( m_linkParsingOperations.isExternalLink( link ) && hasLinkText ) |
| { |
| return makeLink( IMAGELINK, reallink, link, null, null ); |
| } |
| else if( m_linkParsingOperations.linkExists( possiblePage ) && hasLinkText ) |
| { |
| // System.out.println("Orig="+link+", Matched: "+matchedLink); |
| callMutatorChain( m_localLinkMutatorChain, possiblePage ); |
| |
| return makeLink( IMAGEWIKILINK, reallink, link, null, null ); |
| } |
| else |
| { |
| return makeLink( IMAGE, reallink, link, null, null ); |
| } |
| } |
| |
| private Element handleAccessRule( String ruleLine ) { |
| if( m_wysiwygEditorMode ) { |
| m_currentElement.addContent( "[" + ruleLine + "]" ); |
| } |
| |
| if( !m_parseAccessRules ) { |
| return m_currentElement; |
| } |
| final WikiPage page = m_context.getRealPage(); |
| // UserDatabase db = m_context.getEngine().getUserDatabase(); |
| |
| if( ruleLine.startsWith( "{" ) ) { |
| ruleLine = ruleLine.substring( 1 ); |
| } |
| |
| if( ruleLine.endsWith( "}" ) ) { |
| ruleLine = ruleLine.substring( 0, ruleLine.length() - 1 ); |
| } |
| |
| if( log.isDebugEnabled() ) { |
| log.debug("page="+page.getName()+", ACL = "+ruleLine); |
| } |
| |
| try { |
| final Acl acl = m_engine.getManager( AclManager.class ).parseAcl( page, ruleLine ); |
| page.setAcl( acl ); |
| |
| if( log.isDebugEnabled() ) { |
| log.debug( acl.toString() ); |
| } |
| } catch( final WikiSecurityException wse ) { |
| return makeError( wse.getMessage() ); |
| } |
| |
| return m_currentElement; |
| } |
| |
| /** |
| * Handles metadata setting [{SET foo=bar}] |
| */ |
| private Element handleMetadata( final String link ) { |
| if( m_wysiwygEditorMode ) { |
| m_currentElement.addContent( "[" + link + "]" ); |
| } |
| |
| try { |
| final String args = link.substring( link.indexOf(' '), link.length()-1 ); |
| final String name = args.substring( 0, args.indexOf('=') ).trim(); |
| String val = args.substring( args.indexOf('=')+1 ).trim(); |
| |
| if( val.startsWith("'") ) { |
| val = val.substring( 1 ); |
| } |
| if( val.endsWith("'") ) { |
| val = val.substring( 0, val.length()-1 ); |
| } |
| |
| // log.debug("SET name='"+name+"', value='"+val+"'."); |
| |
| if( name.length() > 0 && val.length() > 0 ) { |
| val = m_engine.getManager( VariableManager.class ).expandVariables( m_context, val ); |
| m_context.getPage().setAttribute( name, val ); |
| } |
| } catch( final Exception e ) { |
| final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); |
| return makeError( MessageFormat.format( rb.getString( "markupparser.error.invalidset" ), link ) ); |
| } |
| |
| return m_currentElement; |
| } |
| |
| /** |
| * Emits a processing instruction that will disable markup escaping. This is |
| * very useful if you want to emit HTML directly into the stream. |
| * |
| */ |
| private void disableOutputEscaping() { |
| addElement( new ProcessingInstruction( Result.PI_DISABLE_OUTPUT_ESCAPING, "" ) ); |
| } |
| |
| /** |
| * Gobbles up all hyperlinks that are encased in square brackets. |
| */ |
| private Element handleHyperlinks( String linktext, final int pos ) { |
| final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); |
| final StringBuilder sb = new StringBuilder( linktext.length() + 80 ); |
| |
| if( m_linkParsingOperations.isAccessRule( linktext ) ) { |
| return handleAccessRule( linktext ); |
| } |
| |
| if( m_linkParsingOperations.isMetadata( linktext ) ) { |
| return handleMetadata( linktext ); |
| } |
| |
| if( m_linkParsingOperations.isPluginLink( linktext ) ) { |
| try { |
| final PluginContent pluginContent = PluginContent.parsePluginLine( m_context, linktext, pos ); |
| |
| // This might sometimes fail, especially if there is something which looks like a plugin invocation but is really not. |
| if( pluginContent != null ) { |
| addElement( pluginContent ); |
| pluginContent.executeParse( m_context ); |
| } |
| } catch( final PluginException e ) { |
| log.info( m_context.getRealPage().getWiki() + " : " + m_context.getRealPage().getName() + " - Failed to insert plugin: " + e.getMessage() ); |
| //log.info( "Root cause:",e.getRootThrowable() ); |
| if( !m_wysiwygEditorMode ) { |
| final ResourceBundle rbPlugin = Preferences.getBundle( m_context, WikiPlugin.CORE_PLUGINS_RESOURCEBUNDLE ); |
| return addElement( makeError( MessageFormat.format( rbPlugin.getString( "plugin.error.insertionfailed" ), |
| m_context.getRealPage().getWiki(), |
| m_context.getRealPage().getName(), |
| e.getMessage() ) ) ); |
| } |
| } |
| |
| return m_currentElement; |
| } |
| |
| try { |
| final LinkParser.Link link = m_linkParser.parse( linktext ); |
| linktext = link.getText(); |
| String linkref = link.getReference(); |
| |
| // |
| // Yes, we now have the components separated. |
| // linktext = the text the link should have |
| // linkref = the url or page name. |
| // |
| // In many cases these are the same. [linktext|linkref]. |
| // |
| if( m_linkParsingOperations.isVariableLink( linktext ) ) { |
| final Content el = new VariableContent( linktext ); |
| |
| addElement( el ); |
| } else if( m_linkParsingOperations.isExternalLink( linkref ) ) { |
| // It's an external link, out of this Wiki |
| |
| callMutatorChain( m_externalLinkMutatorChain, linkref ); |
| |
| if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { |
| handleImageLink( linkref, linktext, link.hasReference() ); |
| } else { |
| makeLink( EXTERNAL, linkref, linktext, null, link.getAttributes() ); |
| addElement( outlinkImage() ); |
| } |
| } else if( link.isInterwikiLink() ) { |
| // It's an interwiki link; InterWiki links also get added to external link chain after the links have been resolved. |
| |
| // FIXME: There is an interesting issue here: We probably should |
| // URLEncode the wikiPage, but we can't since some of the |
| // Wikis use slashes (/), which won't survive URLEncoding. |
| // Besides, we don't know which character set the other Wiki |
| // is using, so you'll have to write the entire name as it appears |
| // in the URL. Bugger. |
| |
| final String extWiki = link.getExternalWiki(); |
| final String wikiPage = link.getExternalWikiPage(); |
| |
| if( m_wysiwygEditorMode ) { |
| makeLink( INTERWIKI, extWiki + ":" + wikiPage, linktext, null, link.getAttributes() ); |
| } else { |
| String urlReference = m_engine.getInterWikiURL( extWiki ); |
| |
| if( urlReference != null ) { |
| urlReference = TextUtil.replaceString( urlReference, "%s", wikiPage ); |
| urlReference = callMutatorChain( m_externalLinkMutatorChain, urlReference ); |
| |
| if( m_linkParsingOperations.isImageLink( urlReference, isImageInlining(), getInlineImagePatterns() ) ) { |
| handleImageLink( urlReference, linktext, link.hasReference() ); |
| } else { |
| makeLink( INTERWIKI, urlReference, linktext, null, link.getAttributes() ); |
| } |
| |
| if( m_linkParsingOperations.isExternalLink( urlReference ) ) { |
| addElement( outlinkImage() ); |
| } |
| } else { |
| final Object[] args = { escapeHTMLEntities( extWiki ) }; |
| |
| addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.nointerwikiref" ), args ) ) ); |
| } |
| } |
| } else if( linkref.startsWith( "#" ) ) { |
| // It defines a local footnote |
| makeLink( LOCAL, linkref, linktext, null, link.getAttributes() ); |
| } else if( TextUtil.isNumber( linkref ) ) { |
| // It defines a reference to a local footnote |
| makeLink( LOCALREF, linkref, linktext, null, link.getAttributes() ); |
| } else { |
| final int hashMark; |
| |
| // Internal wiki link, but is it an attachment link? |
| String attachment = m_engine.getManager( AttachmentManager.class ).getAttachmentInfoName( m_context, linkref ); |
| if( attachment != null ) { |
| callMutatorChain( m_attachmentLinkMutatorChain, attachment ); |
| |
| if( m_linkParsingOperations.isImageLink( linkref, isImageInlining(), getInlineImagePatterns() ) ) { |
| attachment = m_context.getURL( WikiContext.ATTACH, attachment ); |
| sb.append( handleImageLink( attachment, linktext, link.hasReference() ) ); |
| } else { |
| makeLink( ATTACHMENT, attachment, linktext, null, link.getAttributes() ); |
| } |
| } else if( ( hashMark = linkref.indexOf( '#' ) ) != -1 ) { |
| // It's an internal Wiki link, but to a named section |
| |
| final String namedSection = linkref.substring( hashMark + 1 ); |
| linkref = linkref.substring( 0, hashMark ); |
| |
| linkref = MarkupParser.cleanLink( linkref ); |
| |
| callMutatorChain( m_localLinkMutatorChain, linkref ); |
| |
| final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); |
| if( matchedLink != null ) { |
| String sectref = "section-" + m_engine.encodeName( matchedLink + "-" + wikifyLink( namedSection ) ); |
| sectref = sectref.replace( '%', '_' ); |
| makeLink( READ, matchedLink, linktext, sectref, link.getAttributes() ); |
| } else { |
| makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); |
| } |
| } else { |
| // It's an internal Wiki link |
| linkref = MarkupParser.cleanLink( linkref ); |
| |
| callMutatorChain( m_localLinkMutatorChain, linkref ); |
| |
| final String matchedLink = m_linkParsingOperations.linkIfExists( linkref ); |
| if( matchedLink != null ) { |
| makeLink( READ, matchedLink, linktext, null, link.getAttributes() ); |
| } else { |
| makeLink( EDIT, linkref, linktext, null, link.getAttributes() ); |
| } |
| } |
| } |
| |
| } catch( final ParseException e ) { |
| log.info( "Parser failure: ", e ); |
| final Object[] args = { e.getMessage() }; |
| addElement( makeError( MessageFormat.format( rb.getString( "markupparser.error.parserfailure" ), args ) ) ); |
| } |
| return m_currentElement; |
| } |
| |
| /** |
| * Pushes back any string that has been read. It will obviously |
| * be pushed back in a reverse order. |
| * |
| * @since 2.1.77 |
| */ |
| private void pushBack( final String s ) |
| throws IOException |
| { |
| for( int i = s.length()-1; i >= 0; i-- ) |
| { |
| pushBack( s.charAt(i) ); |
| } |
| } |
| |
| private Element handleBackslash() |
| throws IOException |
| { |
| final int ch = nextToken(); |
| |
| if( ch == '\\' ) |
| { |
| final int ch2 = nextToken(); |
| |
| if( ch2 == '\\' ) |
| { |
| pushElement( new Element("br").setAttribute("clear","all")); |
| return popElement("br"); |
| } |
| |
| pushBack( ch2 ); |
| |
| pushElement( new Element("br") ); |
| return popElement("br"); |
| } |
| |
| pushBack( ch ); |
| |
| return null; |
| } |
| |
| private Element handleUnderscore() |
| throws IOException |
| { |
| final int ch = nextToken(); |
| Element el = null; |
| |
| if( ch == '_' ) |
| { |
| if( m_isbold ) |
| { |
| el = popElement("b"); |
| } |
| else |
| { |
| el = pushElement( new Element("b") ); |
| } |
| m_isbold = !m_isbold; |
| } |
| else |
| { |
| pushBack( ch ); |
| } |
| |
| return el; |
| } |
| |
| |
| /** |
| * For example: italics. |
| */ |
| private Element handleApostrophe() |
| throws IOException |
| { |
| final int ch = nextToken(); |
| Element el = null; |
| |
| if( ch == '\'' ) |
| { |
| if( m_isitalic ) |
| { |
| el = popElement("i"); |
| } |
| else |
| { |
| el = pushElement( new Element("i") ); |
| } |
| m_isitalic = !m_isitalic; |
| } |
| else |
| { |
| pushBack( ch ); |
| } |
| |
| return el; |
| } |
| |
| private Element handleOpenbrace( final boolean isBlock ) |
| throws IOException |
| { |
| final int ch = nextToken(); |
| |
| if( ch == '{' ) |
| { |
| final int ch2 = nextToken(); |
| |
| if( ch2 == '{' ) |
| { |
| m_isPre = true; |
| m_isEscaping = true; |
| m_isPreBlock = isBlock; |
| |
| if( isBlock ) |
| { |
| startBlockLevel(); |
| return pushElement( new Element("pre") ); |
| } |
| |
| return pushElement( new Element("span").setAttribute("class","inline-code") ); |
| } |
| |
| pushBack( ch2 ); |
| |
| return pushElement( new Element("tt") ); |
| } |
| |
| pushBack( ch ); |
| |
| return null; |
| } |
| |
| /** |
| * Handles both }} and }}} |
| */ |
| private Element handleClosebrace() |
| throws IOException |
| { |
| final int ch2 = nextToken(); |
| |
| if( ch2 == '}' ) |
| { |
| final int ch3 = nextToken(); |
| |
| if( ch3 == '}' ) |
| { |
| if( m_isPre ) |
| { |
| if( m_isPreBlock ) |
| { |
| popElement( "pre" ); |
| } |
| else |
| { |
| popElement( "span" ); |
| } |
| |
| m_isPre = false; |
| m_isEscaping = false; |
| return m_currentElement; |
| } |
| |
| m_plainTextBuf.append("}}}"); |
| return m_currentElement; |
| } |
| |
| pushBack( ch3 ); |
| |
| if( !m_isEscaping ) |
| { |
| return popElement("tt"); |
| } |
| } |
| |
| pushBack( ch2 ); |
| |
| return null; |
| } |
| |
| private Element handleDash() |
| throws IOException |
| { |
| int ch = nextToken(); |
| |
| if( ch == '-' ) |
| { |
| final int ch2 = nextToken(); |
| |
| if( ch2 == '-' ) |
| { |
| final int ch3 = nextToken(); |
| |
| if( ch3 == '-' ) |
| { |
| // Empty away all the rest of the dashes. |
| // Do not forget to return the first non-match back. |
| do |
| { |
| ch = nextToken(); |
| } |
| while ( ch == '-' ); |
| |
| pushBack(ch); |
| startBlockLevel(); |
| pushElement( new Element("hr") ); |
| return popElement( "hr" ); |
| } |
| |
| pushBack( ch3 ); |
| } |
| pushBack( ch2 ); |
| } |
| |
| pushBack( ch ); |
| |
| return null; |
| } |
| |
| private Element handleHeading() |
| throws IOException |
| { |
| Element el = null; |
| |
| final int ch = nextToken(); |
| |
| final Heading hd = new Heading(); |
| |
| if( ch == '!' ) |
| { |
| final int ch2 = nextToken(); |
| |
| if( ch2 == '!' ) |
| { |
| final String title = peekAheadLine(); |
| |
| el = makeHeading( Heading.HEADING_LARGE, title, hd); |
| } |
| else |
| { |
| pushBack( ch2 ); |
| final String title = peekAheadLine(); |
| el = makeHeading( Heading.HEADING_MEDIUM, title, hd ); |
| } |
| } |
| else |
| { |
| pushBack( ch ); |
| final String title = peekAheadLine(); |
| el = makeHeading( Heading.HEADING_SMALL, title, hd ); |
| } |
| |
| callHeadingListenerChain( hd ); |
| |
| m_lastHeading = hd; |
| |
| if( el != null ) pushElement(el); |
| |
| return el; |
| } |
| |
| /** |
| * Reads the stream until the next EOL or EOF. Note that it will also read the |
| * EOL from the stream. |
| */ |
| private StringBuilder readUntilEOL() |
| throws IOException |
| { |
| int ch; |
| final StringBuilder buf = new StringBuilder( 256 ); |
| |
| while( true ) |
| { |
| ch = nextToken(); |
| |
| if( ch == -1 ) |
| break; |
| |
| buf.append( (char) ch ); |
| |
| if( ch == '\n' ) |
| break; |
| } |
| return buf; |
| } |
| |
| /** Controls whether italic is restarted after a paragraph shift */ |
| |
| private boolean m_restartitalic = false; |
| private boolean m_restartbold = false; |
| |
| private boolean m_newLine; |
| |
| /** |
| * Starts a block level element, therefore closing |
| * a potential open paragraph tag. |
| */ |
| private void startBlockLevel() |
| { |
| // These may not continue over block level limits in XHTML |
| |
| popElement("i"); |
| popElement("b"); |
| popElement("tt"); |
| |
| if( m_isOpenParagraph ) |
| { |
| m_isOpenParagraph = false; |
| popElement("p"); |
| m_plainTextBuf.append("\n"); // Just small beautification |
| } |
| |
| m_restartitalic = m_isitalic; |
| m_restartbold = m_isbold; |
| |
| m_isitalic = false; |
| m_isbold = false; |
| } |
| |
| private static String getListType( final char c ) |
| { |
| if( c == '*' ) |
| { |
| return "ul"; |
| } |
| else if( c == '#' ) |
| { |
| return "ol"; |
| } |
| throw new InternalWikiException("Parser got faulty list type: "+c); |
| } |
| /** |
| * Like original handleOrderedList() and handleUnorderedList() |
| * however handles both ordered ('#') and unordered ('*') mixed together. |
| */ |
| |
| // FIXME: Refactor this; it's a bit messy. |
| |
| private Element handleGeneralList() |
| throws IOException |
| { |
| startBlockLevel(); |
| |
| String strBullets = readWhile( "*#" ); |
| // String strBulletsRaw = strBullets; // to know what was original before phpwiki style substitution |
| final int numBullets = strBullets.length(); |
| |
| // override the beginning portion of bullet pattern to be like the previous |
| // to simulate PHPWiki style lists |
| |
| if(m_allowPHPWikiStyleLists) |
| { |
| // only substitute if different |
| if(!( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals |
| (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) ) |
| { |
| if(numBullets <= m_genlistlevel) |
| { |
| // Substitute all but the last character (keep the expressed bullet preference) |
| strBullets = (numBullets > 1 ? m_genlistBulletBuffer.substring(0, numBullets-1) : "") |
| + strBullets.substring(numBullets-1, numBullets); |
| } |
| else |
| { |
| strBullets = m_genlistBulletBuffer + strBullets.substring(m_genlistlevel, numBullets); |
| } |
| } |
| } |
| |
| // |
| // Check if this is still of the same type |
| // |
| if( strBullets.substring(0,Math.min(numBullets,m_genlistlevel)).equals |
| (m_genlistBulletBuffer.substring(0,Math.min(numBullets,m_genlistlevel)) ) ) |
| { |
| if( numBullets > m_genlistlevel ) |
| { |
| pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel++) ) ) ); |
| |
| for( ; m_genlistlevel < numBullets; m_genlistlevel++ ) |
| { |
| // bullets are growing, get from new bullet list |
| pushElement( new Element("li") ); |
| pushElement( new Element( getListType(strBullets.charAt(m_genlistlevel)) )); |
| } |
| } |
| else if( numBullets < m_genlistlevel ) |
| { |
| // Close the previous list item. |
| // buf.append( m_renderer.closeListItem() ); |
| popElement( "li" ); |
| |
| for( ; m_genlistlevel > numBullets; m_genlistlevel-- ) |
| { |
| // bullets are shrinking, get from old bullet list |
| |
| popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); |
| if( m_genlistlevel > 0 ) |
| { |
| popElement( "li" ); |
| } |
| |
| } |
| } |
| else |
| { |
| if( m_genlistlevel > 0 ) |
| { |
| popElement( "li" ); |
| } |
| } |
| } |
| else |
| { |
| // |
| // The pattern has changed, unwind and restart |
| // |
| int numEqualBullets; |
| final int numCheckBullets; |
| |
| // find out how much is the same |
| numEqualBullets = 0; |
| numCheckBullets = Math.min(numBullets,m_genlistlevel); |
| |
| while( numEqualBullets < numCheckBullets ) |
| { |
| // if the bullets are equal so far, keep going |
| if( strBullets.charAt(numEqualBullets) == m_genlistBulletBuffer.charAt(numEqualBullets)) |
| numEqualBullets++; |
| // otherwise giveup, we have found how many are equal |
| else |
| break; |
| } |
| |
| //unwind |
| for( ; m_genlistlevel > numEqualBullets; m_genlistlevel-- ) |
| { |
| popElement( getListType( m_genlistBulletBuffer.charAt(m_genlistlevel-1) ) ); |
| if( m_genlistlevel > numBullets ) |
| { |
| popElement("li"); |
| } |
| } |
| |
| //rewind |
| |
| pushElement( new Element(getListType( strBullets.charAt(numEqualBullets++) ) ) ); |
| for(int i = numEqualBullets; i < numBullets; i++) |
| { |
| pushElement( new Element("li") ); |
| pushElement( new Element( getListType( strBullets.charAt(i) ) ) ); |
| } |
| m_genlistlevel = numBullets; |
| } |
| |
| // |
| // Push a new list item, and eat away any extra whitespace |
| // |
| pushElement( new Element("li") ); |
| readWhile(" "); |
| |
| // work done, remember the new bullet list (in place of old one) |
| m_genlistBulletBuffer.setLength(0); |
| m_genlistBulletBuffer.append(strBullets); |
| |
| return m_currentElement; |
| } |
| |
| private Element unwindGeneralList() |
| { |
| //unwind |
| for( ; m_genlistlevel > 0; m_genlistlevel-- ) |
| { |
| popElement( "li" ); |
| popElement( getListType(m_genlistBulletBuffer.charAt(m_genlistlevel-1)) ); |
| } |
| |
| m_genlistBulletBuffer.setLength(0); |
| |
| return null; |
| } |
| |
| |
| private Element handleDefinitionList() |
| throws IOException |
| { |
| if( !m_isdefinition ) |
| { |
| m_isdefinition = true; |
| |
| startBlockLevel(); |
| |
| pushElement( new Element("dl") ); |
| return pushElement( new Element("dt") ); |
| } |
| |
| return null; |
| } |
| |
| private Element handleOpenbracket() |
| throws IOException |
| { |
| final StringBuilder sb = new StringBuilder(40); |
| final int pos = getPosition(); |
| int ch = nextToken(); |
| boolean isPlugin = false; |
| |
| if( ch == '[' ) |
| { |
| if( m_wysiwygEditorMode ) |
| { |
| sb.append( '[' ); |
| } |
| |
| sb.append( (char)ch ); |
| |
| while( (ch = nextToken()) == '[' ) |
| { |
| sb.append( (char)ch ); |
| } |
| } |
| |
| |
| if( ch == '{' ) |
| { |
| isPlugin = true; |
| } |
| |
| pushBack( ch ); |
| |
| if( sb.length() > 0 ) |
| { |
| m_plainTextBuf.append( sb ); |
| return m_currentElement; |
| } |
| |
| // |
| // Find end of hyperlink |
| // |
| |
| ch = nextToken(); |
| int nesting = 1; // Check for nested plugins |
| |
| while( ch != -1 ) |
| { |
| final int ch2 = nextToken(); pushBack(ch2); |
| |
| if( isPlugin ) |
| { |
| if( ch == '[' && ch2 == '{' ) |
| { |
| nesting++; |
| } |
| else if( nesting == 0 && ch == ']' && sb.charAt(sb.length()-1) == '}' ) |
| { |
| break; |
| } |
| else if( ch == '}' && ch2 == ']' ) |
| { |
| // NB: This will be decremented once at the end |
| nesting--; |
| } |
| } |
| else |
| { |
| if( ch == ']' ) |
| { |
| break; |
| } |
| } |
| |
| sb.append( (char) ch ); |
| |
| ch = nextToken(); |
| } |
| |
| // |
| // If the link is never finished, do some tricks to display the rest of the line |
| // unchanged. |
| // |
| if( ch == -1 ) |
| { |
| log.debug("Warning: unterminated link detected!"); |
| m_isEscaping = true; |
| m_plainTextBuf.append( sb ); |
| flushPlainText(); |
| m_isEscaping = false; |
| return m_currentElement; |
| } |
| |
| return handleHyperlinks( sb.toString(), pos ); |
| } |
| |
| /** |
| * Reads the stream until the current brace is closed or stream end. |
| */ |
| private String readBraceContent( final char opening, final char closing ) |
| throws IOException |
| { |
| final StringBuilder sb = new StringBuilder(40); |
| int braceLevel = 1; |
| int ch; |
| while(( ch = nextToken() ) != -1 ) |
| { |
| if( ch == '\\' ) |
| { |
| continue; |
| } |
| else if ( ch == opening ) |
| { |
| braceLevel++; |
| } |
| else if ( ch == closing ) |
| { |
| braceLevel--; |
| if (braceLevel==0) |
| { |
| break; |
| } |
| } |
| sb.append( (char)ch ); |
| } |
| return sb.toString(); |
| } |
| |
| |
| /** |
| * Handles constructs of type %%(style) and %%class |
| * @param newLine |
| * @return An Element containing the div or span, depending on the situation. |
| * @throws IOException |
| */ |
| private Element handleDiv( final boolean newLine ) |
| throws IOException |
| { |
| int ch = nextToken(); |
| Element el = null; |
| |
| if( ch == '%' ) |
| { |
| String style = null; |
| String clazz = null; |
| |
| ch = nextToken(); |
| |
| // |
| // Style or class? |
| // |
| if( ch == '(' ) |
| { |
| style = readBraceContent('(',')'); |
| } |
| else if( Character.isLetter( (char) ch ) ) |
| { |
| pushBack( ch ); |
| clazz = readUntil( "( \t\n\r" ); |
| //Note: ref.https://www.w3.org/TR/CSS21/syndata.html#characters |
| //CSS Classnames can contain only the characters [a-zA-Z0-9] and |
| //ISO 10646 characters U+00A0 and higher, plus the "-" and the "_". |
| //They cannot start with a digit, two hyphens, or a hyphen followed by a digit. |
| |
| //(1) replace '.' by spaces, allowing multiple classnames on a div or span |
| //(2) remove any invalid character |
| if( clazz != null){ |
| |
| clazz = clazz.replace('.', ' ') |
| .replaceAll("[^\\s-_\\w\\x200-\\x377]+",""); |
| |
| } |
| ch = nextToken(); |
| |
| //check for %%class1.class2( style information ) |
| if( ch == '(' ) |
| { |
| style = readBraceContent('(',')'); |
| } |
| // |
| // Pop out only spaces, so that the upcoming EOL check does not check the |
| // next line. |
| // |
| else if( ch == '\n' || ch == '\r' ) |
| { |
| pushBack(ch); |
| } |
| } |
| else |
| { |
| // |
| // Anything else stops. |
| // |
| |
| pushBack(ch); |
| |
| try |
| { |
| final Boolean isSpan = m_styleStack.pop(); |
| |
| if( isSpan == null ) |
| { |
| // Fail quietly |
| } |
| else if( isSpan.booleanValue() ) |
| { |
| el = popElement( "span" ); |
| } |
| else |
| { |
| el = popElement( "div" ); |
| } |
| } |
| catch( final EmptyStackException e ) |
| { |
| log.debug("Page '"+m_context.getName()+"' closes a %%-block that has not been opened."); |
| return m_currentElement; |
| } |
| |
| return el; |
| } |
| |
| // |
| // Check if there is an attempt to do something nasty |
| // |
| |
| try |
| { |
| style = StringEscapeUtils.unescapeHtml4(style); |
| if( style != null && style.indexOf("javascript:") != -1 ) |
| { |
| log.debug("Attempt to output javascript within CSS:"+style); |
| final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); |
| return addElement( makeError( rb.getString( "markupparser.error.javascriptattempt" ) ) ); |
| } |
| } |
| catch( final NumberFormatException e ) |
| { |
| // |
| // If there are unknown entities, we don't want the parser to stop. |
| // |
| final ResourceBundle rb = Preferences.getBundle( m_context, InternationalizationManager.CORE_BUNDLE ); |
| final String msg = MessageFormat.format( rb.getString( "markupparser.error.parserfailure"), e.getMessage() ); |
| return addElement( makeError( msg ) ); |
| } |
| |
| // |
| // Decide if we should open a div or a span? |
| // |
| final String eol = peekAheadLine(); |
| |
| if( eol.trim().length() > 0 ) |
| { |
| // There is stuff after the class |
| |
| el = new Element("span"); |
| |
| m_styleStack.push( Boolean.TRUE ); |
| } |
| else |
| { |
| startBlockLevel(); |
| el = new Element("div"); |
| m_styleStack.push( Boolean.FALSE ); |
| } |
| |
| if( style != null ) el.setAttribute("style", style); |
| if( clazz != null ) el.setAttribute("class", clazz); |
| el = pushElement( el ); |
| |
| return el; |
| } |
| |
| pushBack(ch); |
| |
| return el; |
| } |
| |
| private Element handleSlash( final boolean newLine ) |
| throws IOException |
| { |
| final int ch = nextToken(); |
| |
| pushBack(ch); |
| if( ch == '%' && !m_styleStack.isEmpty() ) |
| { |
| return handleDiv( newLine ); |
| } |
| |
| return null; |
| } |
| |
| private Element handleBar( final boolean newLine ) |
| throws IOException |
| { |
| Element el = null; |
| |
| if( !m_istable && !newLine ) |
| { |
| return null; |
| } |
| |
| // |
| // If the bar is in the first column, we will either start |
| // a new table or continue the old one. |
| // |
| |
| if( newLine ) |
| { |
| if( !m_istable ) |
| { |
| startBlockLevel(); |
| el = pushElement( new Element("table").setAttribute("class","wikitable").setAttribute("border","1") ); |
| m_istable = true; |
| m_rowNum = 0; |
| } |
| |
| m_rowNum++; |
| final Element tr = ( m_rowNum % 2 != 0 ) |
| ? new Element("tr").setAttribute("class", "odd") |
| : new Element("tr"); |
| el = pushElement( tr ); |
| } |
| |
| // |
| // Check out which table cell element to start; |
| // a header element (th) or a regular element (td). |
| // |
| final int ch = nextToken(); |
| |
| if( ch == '|' ) |
| { |
| if( !newLine ) |
| { |
| el = popElement("th"); |
| if( el == null ) popElement("td"); |
| } |
| el = pushElement( new Element("th") ); |
| } |
| else |
| { |
| if( !newLine ) |
| { |
| el = popElement("td"); |
| if( el == null ) popElement("th"); |
| } |
| |
| el = pushElement( new Element("td") ); |
| |
| pushBack( ch ); |
| } |
| |
| return el; |
| } |
| |
| /** |
| * Generic escape of next character or entity. |
| */ |
| private Element handleTilde() |
| throws IOException |
| { |
| final int ch = nextToken(); |
| |
| if( ch == ' ' ) |
| { |
| if( m_wysiwygEditorMode ) |
| { |
| m_plainTextBuf.append( "~ " ); |
| } |
| return m_currentElement; |
| } |
| |
| if( ch == '|' || ch == '~' || ch == '\\' || ch == '*' || ch == '#' || |
| ch == '-' || ch == '!' || ch == '\'' || ch == '_' || ch == '[' || |
| ch == '{' || ch == ']' || ch == '}' || ch == '%' ) |
| { |
| if( m_wysiwygEditorMode ) |
| { |
| m_plainTextBuf.append( '~' ); |
| } |
| |
| m_plainTextBuf.append( (char)ch ); |
| m_plainTextBuf.append(readWhile( ""+(char)ch )); |
| return m_currentElement; |
| } |
| |
| // No escape. |
| pushBack( ch ); |
| |
| return null; |
| } |
| |
| private void fillBuffer( final Element startElement ) |
| throws IOException |
| { |
| m_currentElement = startElement; |
| |
| boolean quitReading = false; |
| m_newLine = true; |
| disableOutputEscaping(); |
| |
| while(!quitReading) |
| { |
| final int ch = nextToken(); |
| |
| if( ch == -1 ) break; |
| |
| // |
| // Check if we're actually ending the preformatted mode. |
| // We still must do an entity transformation here. |
| // |
| if( m_isEscaping ) |
| { |
| if( ch == '}' ) |
| { |
| if( handleClosebrace() == null ) m_plainTextBuf.append( (char) ch ); |
| } |
| else if( ch == -1 ) |
| { |
| quitReading = true; |
| } |
| else if( ch == '\r' ) |
| { |
| // DOS line feeds we ignore. |
| } |
| else if( ch == '<' ) |
| { |
| m_plainTextBuf.append( "<" ); |
| } |
| else if( ch == '>' ) |
| { |
| m_plainTextBuf.append( ">" ); |
| } |
| else if( ch == '&' ) |
| { |
| m_plainTextBuf.append( "&" ); |
| } |
| else if( ch == '~' ) |
| { |
| String braces = readWhile("}"); |
| if( braces.length() >= 3 ) |
| { |
| m_plainTextBuf.append("}}}"); |
| |
| braces = braces.substring(3); |
| } |
| else |
| { |
| m_plainTextBuf.append( (char) ch ); |
| } |
| |
| for( int i = braces.length()-1; i >= 0; i-- ) |
| { |
| pushBack(braces.charAt(i)); |
| } |
| } |
| else |
| { |
| m_plainTextBuf.append( (char) ch ); |
| } |
| |
| continue; |
| } |
| |
| // |
| // An empty line stops a list |
| // |
| if( m_newLine && ch != '*' && ch != '#' && ch != ' ' && m_genlistlevel > 0 ) |
| { |
| m_plainTextBuf.append(unwindGeneralList()); |
| } |
| |
| if( m_newLine && ch != '|' && m_istable ) |
| { |
| popElement("table"); |
| m_istable = false; |
| } |
| |
| int skip = IGNORE; |
| |
| // |
| // Do the actual parsing and catch any errors. |
| // |
| try |
| { |
| skip = parseToken( ch ); |
| } |
| catch( final IllegalDataException e ) |
| { |
| log.info("Page "+m_context.getPage().getName()+" contains data which cannot be added to DOM tree: "+e.getMessage()); |
| |
| makeError("Error: "+cleanupSuspectData(e.getMessage()) ); |
| } |
| |
| // |
| // The idea is as follows: If the handler method returns |
| // an element (el != null), it is assumed that it has been |
| // added in the stack. Otherwise the character is added |
| // as is to the plaintext buffer. |
| // |
| // For the transition phase, if s != null, it also gets |
| // added in the plaintext buffer. |
| // |
| |
| switch( skip ) |
| { |
| case ELEMENT: |
| m_newLine = false; |
| break; |
| |
| case CHARACTER: |
| m_plainTextBuf.append( (char) ch ); |
| m_newLine = false; |
| break; |
| |
| case IGNORE: |
| default: |
| break; |
| } |
| } |
| |
| closeHeadings(); |
| popElement("domroot"); |
| } |
| |
| private String cleanupSuspectData( final String s ) |
| { |
| final StringBuilder sb = new StringBuilder( s.length() ); |
| |
| for( int i = 0; i < s.length(); i++ ) |
| { |
| final char c = s.charAt(i); |
| |
| if( Verifier.isXMLCharacter( c ) ) sb.append( c ); |
| else sb.append( "0x"+Integer.toString(c,16).toUpperCase() ); |
| } |
| |
| return sb.toString(); |
| } |
| |
| /** The token is a plain character. */ |
| protected static final int CHARACTER = 0; |
| |
| /** The token is a wikimarkup element. */ |
| protected static final int ELEMENT = 1; |
| |
| /** The token is to be ignored. */ |
| protected static final int IGNORE = 2; |
| |
| /** |
| * Return CHARACTER, if you think this was a plain character; ELEMENT, if |
| * you think this was a wiki markup element, and IGNORE, if you think |
| * we should ignore this altogether. |
| * <p> |
| * To add your own MarkupParser, you can override this method, but it |
| * is recommended that you call super.parseToken() as well to gain advantage |
| * of JSPWiki's own markup. You can call it at the start of your own |
| * parseToken() or end - it does not matter. |
| * |
| * @param ch The character under investigation |
| * @return {@link #ELEMENT}, {@link #CHARACTER} or {@link #IGNORE}. |
| * @throws IOException If parsing fails. |
| */ |
| protected int parseToken( final int ch ) |
| throws IOException |
| { |
| Element el = null; |
| |
| // |
| // Now, check the incoming token. |
| // |
| switch( ch ) |
| { |
| case '\r': |
| // DOS linefeeds we forget |
| return IGNORE; |
| |
| case '\n': |
| // |
| // Close things like headings, etc. |
| // |
| |
| // FIXME: This is not really very fast |
| |
| closeHeadings(); |
| |
| popElement("dl"); // Close definition lists. |
| if( m_istable ) |
| { |
| popElement("tr"); |
| } |
| |
| m_isdefinition = false; |
| |
| if( m_newLine ) |
| { |
| // Paragraph change. |
| startBlockLevel(); |
| |
| // |
| // Figure out which elements cannot be enclosed inside |
| // a <p></p> pair according to XHTML rules. |
| // |
| final String nextLine = peekAheadLine(); |
| if( nextLine.length() == 0 || |
| (nextLine.length() > 0 && |
| !nextLine.startsWith("{{{") && |
| !nextLine.startsWith("----") && |
| !nextLine.startsWith("%%") && |
| "*#!;".indexOf( nextLine.charAt(0) ) == -1) ) |
| { |
| pushElement( new Element("p") ); |
| m_isOpenParagraph = true; |
| |
| if( m_restartitalic ) |
| { |
| pushElement( new Element("i") ); |
| m_isitalic = true; |
| m_restartitalic = false; |
| } |
| if( m_restartbold ) |
| { |
| pushElement( new Element("b") ); |
| m_isbold = true; |
| m_restartbold = false; |
| } |
| } |
| } |
| else |
| { |
| m_plainTextBuf.append("\n"); |
| m_newLine = true; |
| } |
| return IGNORE; |
| |
| |
| case '\\': |
| el = handleBackslash(); |
| break; |
| |
| case '_': |
| el = handleUnderscore(); |
| break; |
| |
| case '\'': |
| el = handleApostrophe(); |
| break; |
| |
| case '{': |
| el = handleOpenbrace( m_newLine ); |
| break; |
| |
| case '}': |
| el = handleClosebrace(); |
| break; |
| |
| case '-': |
| if( m_newLine ) |
| el = handleDash(); |
| |
| break; |
| |
| case '!': |
| if( m_newLine ) |
| { |
| el = handleHeading(); |
| } |
| break; |
| |
| case ';': |
| if( m_newLine ) |
| { |
| el = handleDefinitionList(); |
| } |
| break; |
| |
| case ':': |
| if( m_isdefinition ) |
| { |
| popElement("dt"); |
| el = pushElement( new Element("dd") ); |
| m_isdefinition = false; |
| } |
| break; |
| |
| case '[': |
| el = handleOpenbracket(); |
| break; |
| |
| case '*': |
| if( m_newLine ) |
| { |
| pushBack('*'); |
| el = handleGeneralList(); |
| } |
| break; |
| |
| case '#': |
| if( m_newLine ) |
| { |
| pushBack('#'); |
| el = handleGeneralList(); |
| } |
| break; |
| |
| case '|': |
| el = handleBar( m_newLine ); |
| break; |
| |
| case '~': |
| el = handleTilde(); |
| break; |
| |
| case '%': |
| el = handleDiv( m_newLine ); |
| break; |
| |
| case '/': |
| el = handleSlash( m_newLine ); |
| break; |
| |
| default: |
| break; |
| } |
| |
| return el != null ? ELEMENT : CHARACTER; |
| } |
| |
| private void closeHeadings() |
| { |
| if( m_lastHeading != null && !m_wysiwygEditorMode ) |
| { |
| // Add the hash anchor element at the end of the heading |
| addElement( new Element("a").setAttribute( "class",HASHLINK ).setAttribute( "href","#"+m_lastHeading.m_titleAnchor ).setText( "#" ) ); |
| m_lastHeading = null; |
| } |
| popElement("h2"); |
| popElement("h3"); |
| popElement("h4"); |
| } |
| |
| /** |
| * Parses the entire document from the Reader given in the constructor or |
| * set by {@link #setInputReader(Reader)}. |
| * |
| * @return A WikiDocument, ready to be passed to the renderer. |
| * @throws IOException If parsing cannot be accomplished. |
| */ |
| @Override |
| public WikiDocument parse() |
| throws IOException |
| { |
| final WikiDocument d = new WikiDocument( m_context.getPage() ); |
| d.setContext( m_context ); |
| |
| final Element rootElement = new Element("domroot"); |
| |
| d.setRootElement( rootElement ); |
| |
| fillBuffer( rootElement ); |
| |
| paragraphify(rootElement); |
| |
| return d; |
| } |
| |
| /** |
| * Checks out that the first paragraph is correctly installed. |
| * |
| * @param rootElement |
| */ |
| private void paragraphify( final Element rootElement) |
| { |
| // |
| // Add the paragraph tag to the first paragraph |
| // |
| final List< Content > kids = rootElement.getContent(); |
| |
| if( rootElement.getChild("p") != null ) |
| { |
| final ArrayList<Content> ls = new ArrayList<>(); |
| int idxOfFirstContent = 0; |
| int count = 0; |
| |
| for( final Iterator< Content > i = kids.iterator(); i.hasNext(); count++ ) |
| { |
| final Content c = i.next(); |
| if( c instanceof Element ) |
| { |
| final String name = ( ( Element )c ).getName(); |
| if( isBlockLevel( name ) ) break; |
| } |
| |
| if( !(c instanceof ProcessingInstruction) ) |
| { |
| ls.add( c ); |
| if( idxOfFirstContent == 0 ) idxOfFirstContent = count; |
| } |
| } |
| |
| // |
| // If there were any elements, then add a new <p> (unless it would |
| // be an empty one) |
| // |
| if( ls.size() > 0 ) |
| { |
| final Element newel = new Element("p"); |
| |
| for( final Iterator< Content > i = ls.iterator(); i.hasNext(); ) |
| { |
| final Content c = i.next(); |
| |
| c.detach(); |
| newel.addContent(c); |
| } |
| |
| // |
| // Make sure there are no empty <p/> tags added. |
| // |
| if( newel.getTextTrim().length() > 0 || !newel.getChildren().isEmpty() ) |
| rootElement.addContent(idxOfFirstContent, newel); |
| } |
| } |
| } |
| |
| } |