blob: 5763604d04692e997c47ac1a2c40825b43051e6b [file] [log] [blame]
Mediawiki.0001.allow-tilde-in-links.property=true
Mediawiki.0002.illegalnames-urldecode.property=false
Mediawiki.0003.xmlevents.property=com.atlassian.uwc.converters.xml.DefaultXmlEvents
Mediawiki.0003.xml-fragments.property=true
Mediawiki.0003.xml-fragments-root.property=html
## Html Tidy - Turn this option on to automatically correct malformed html/xml
#Mediawiki.0003.xml-use-htmltidy.property=true
## Html Tidy Options
## - set JTidy options using the following naming convention:
## - xml-tidyopt-key.property=val
## - where key is any supported JTidy key listed here:
## http://www.docjar.com/docs/api/org/w3c/tidy/Tidy.html
## but using the property naming convention listed here:
## http://tidy.sourceforge.net/docs/quickref.html
## - and value is one of the options described here:
## http://tidy.sourceforge.net/docs/quickref.html
## - Example. To turn on numeric-entities, uncomment:
#Mediawiki.0003.xml-tidyopt-numeric-entities.property=true
## User Date Metadata - Uncomment these if you want the UWC to convert
## optionally exportable user and timestamp metadata
## change the users-must-exist property to false if you do not want the
## framework to check if the username is an existing confluence user
## Requires UDMF Plugin https://studio.plugins.atlassian.com/wiki/display/udmf
## If you need to get rid of user/timestamp macros, but don't want the
## user timestamp data actually associated, set userdate-disabled to true
#Mediawiki.0004.userdate.class=com.atlassian.uwc.converters.mediawiki.UserDateConverter
#Mediawiki.0004.users-must-exist.property=true
#Mediawiki.0004.userdate-disabled.property=false
Mediawiki.0004.tokenize-math.java-regex-tokenizer=(^|\n):+((?s)<math>.*?<\/math>){replace-with}$1\n$2
Mediawiki.0005.tokenize-math.java-regex-tokenizer=((?s)<math>.*?<\/math>){replace-with}$1
## Hierarchy
## If your hierarchy can be represented in the page content, uncomment and
## configure the 0006 properties to maintain your hierarchy data.
## CONFIGURE:
## * content-hierarchy-pattern - this is the regex that will find your hierarchy
## data. The contents of group 1 in this regex should list the current pages
## ancestors.
## * content-hierarchy-pattern-includes-current -
## if true, content-hierarchy-pattern will be expected to list the current
## page title as well. If false, only the ancestors will be expected.
## * content-hierarchy-delim - the delimiter that seperates the ancestors in
## the ancestor string found by content-hierarchy-pattern
## * content-hierarchy-default-root - The pagename of the parent page for
## pages with no ancestor. If blank, or a nonexistant pagename, the page will
## be given no parent, ie. will be an orphan page, and sibling to Home
## * content-hierarchy-setname - If true, the origtitle data will be used
## to set the title. Set to true when setting char encoding.
## * remove-content - If you want the hierarchy content to be scrubbed from
## the page content, set the regex to the same property as
## content-hierarchy-pattern. Remember to leave the {replace-with} at the end.
#Mediawiki.0006.switch.hierarchy-builder=UseBuilder
#MediaWiki.0006.classname.hierarchy-builder=com.atlassian.uwc.hierarchies.ContentHierarchy
#Mediawiki.0006.content-hierarchy-pattern.property=\{orig-title:([^}]*)\}
#Mediawiki.0006.content-hierarchy-pattern-includes-current.property=true
#Mediawiki.0006.content-hierarchy-delim.property=/
#MediaWiki.0006.content-hierarchy-default-root.property=Home
#MediaWiki.0006.content-hierarchy-setname.property=false
#MediaWiki.0006.remove-content.java-regex=\{orig-title:([^}]*)\}{replace-with}
Mediawiki.0009-escapebraces.class=com.atlassian.uwc.converters.mediawiki.EscapeBracesConverter
Mediawiki.0010-win-newline-cleaner.class=com.atlassian.uwc.converters.tikiwiki.WinNewlinesConverter
Mediawiki.0014-tokenizedollars.java-regex-tokenizer=([$]){replace-with}$1
## Page Histories - if you exported with page history options turned on,
## uncomment and configure these options. See
## http://confluence.atlassian.com/display/CONFEXT/UWC+Page+History+Framework
#Mediawiki.0050.switch.page-history-preservation=true
#Mediawiki.0051.suffix.page-history-preservation=-#.txt
## Code, Pre, and Leading Spaces
Mediawiki.0090-re_pre.java-regex-tokenizer=\<pre\>((?s).*?)\<\/pre\>{replace-with}{code}$1{code}
Mediawiki.0095-re_code.java-regex-tokenizer=\<code\>((?s).*?)\<\/code\>{replace-with}{code}$1{code}
## Leading Spaces -> panel or noformat macros
## Set leading-spaces-noformat property to true if you want the output
## to be noformat lines instead of one big panel macro.
## Note: using noformat will be more effecient
## Set the leading-spaces-delim to 'code','noformat', or 'panel'. To use
## leading-spaces-delim the noformat property must be false.
Mediawiki.0096.leading-spaces-noformat.property=false
Mediawiki.0096.leading-spaces-delim.property=code
Mediawiki.0096.ws2panel.class=com.atlassian.uwc.converters.mediawiki.LeadingSpacesConverter
## requires the Table of Contents Macro, tested with version 1.4.7,
## http://www.randombits.org/display/CONF/Table+of+Contents+Plugin
Mediawiki.0100-toc.java-regex=__TOC__{replace-with}{toc:outline=true|printable=false|style=none|indent=20px}
Mediawiki.0300-re_bold_italics.java-regex='{5}(.*?)'{5}{replace-with}*_$1_*
Mediawiki.0330-multiline-edge1.java-regex=''+\r?\n(=+.*?=+\r?\n)''+{replace-with}$1
Mediawiki.0340-re_multiline.class=com.atlassian.uwc.converters.mediawiki.MultilineBasicConverter
Mediawiki.0350-re_bold.java-regex='{3}\s*(.*?)\s*'{3}{replace-with}*$1*
Mediawiki.0350-re_italics.java-regex='{2}\s*(.*?)\s*'{2}{replace-with}_$1_
Mediawiki.0355-removehtmlcomments.java-regex=<!--.*?-->{replace-with}
## Spans need to be cleared before tables
#Mediawiki.0356.span-color.java-regex=<span style=\"color:([^\"]+)\">(.*?)<\/span>{replace-with}{color:$1}$2{color}
#Mediawiki.0357.span-clear.java-regex=<span[^>]+>(.*?)<\/span>{replace-with}$1
## Tables
## Output can be configured with the tableoutput property. Options are:
## "confluence", and "contentformatting" which refer to the default confluence
## table syntax and the Content Formatting Plugin's table syntax, respectively
## Note: switch the 1509 table parser option, if html tables were used as well
#Mediawiki.0359.enforce-table-columns-minimum.class=com.atlassian.uwc.converters.mediawiki.EnforceTableMinimumConverter
Mediawiki.0360.tableoutput.property=confluence
Mediawiki.0360-re_tables.class=com.atlassian.uwc.converters.mediawiki.TableConverter
Mediawiki.0361-cleannestedtables.java-regex=(\{panel\}\s*)\|\\\}{replace-with}$1
Mediawiki.0362-re_noformat.java-regex=<nowiki>((?s).*?)<\/nowiki>{replace-with}{noformat}$1{noformat}
Mediawiki.0365.tokenize-exclamation.java-regex-tokenizer=([!]){replace-with}\\!
## Handle redirect syntax before link syntax - you'll need to install the
## Redirection Plugin to use this converter
## http://www.customware.net/repository/display/AtlassianPlugins/Redirection+Plugin
#Mediawiki.0368.redirect.class=com.atlassian.uwc.converters.mediawiki.RedirectConverter
## NOTE: Images must come after tables or the whitespace gets screwed up if a table has images
## Images must come before Links or Alias handling will make Image conversion more complicated
Mediawiki.0370-re_images.class=com.atlassian.uwc.converters.mediawiki.ImageConverter
Mediawiki.0400-categories2labels.class=com.atlassian.uwc.converters.mediawiki.CategoryConverter
Mediawiki.0401-re_links_no_categories.java-regex=((?i)\[\[(category:[^\]]+)\]\]\s*)+{replace-with}
Mediawiki.0402-re_bild_to_images.java-regex=(?i)\[\[Bild:\s*([^\]\|\s]+)\s*\]\]{replace-with}!$1!
Mediawiki.0402-re_file_to_images.java-regex=(?i)\[\[file:\s*([^\]\|\s]+)\s*\]\]{replace-with}!$1!
Mediawiki.0403-re_links_to_images.java-regex=(?i)\[\[:?Media:\s*([^\]]+)\]\]{replace-with}[[^$1]]
## 404 and 405 are wikipedia interwiki links. See uwc-296.
## Uncomment to have [[wikipedia:tags]] become [tags@wikipedia].
## You'll need to add the appropriate Confluence shortcut link
## 404 and 405 must be before the namespace cleaner.
#Mediawiki.0404.allow-at-in-links.property=true
#Mediawiki.0405.wikipedia.java-regex=\[wikipedia:([^|\]]+){replace-with}[$1@wikipedia
Mediawiki.0406-mailto.java-regex=(?i)(?<=\[)(mailto:[^\] ]*) +([^\]]*){replace-with}$2|$1
Mediawiki.0407-re_links_colons.class=com.atlassian.uwc.converters.mediawiki.NamespaceCleaner
Mediawiki.0408-re_links-ws1.java-regex=\[\s+([^\]]+?)\]{replace-with}[$1]
Mediawiki.0409-re_links-ws2.java-regex=\[([^\]]+?)\s+\]{replace-with}[$1]
Mediawiki.0410-re_links.java-regex=\[\[([^|\]]*)\]\]{replace-with}[$1]
Mediawiki.0420-re_links_alias.java-regex=\[\[([^|]*)\| *([^\]]*)\]\]{replace-with}[$2|$1]
Mediawiki.0421-re_links_ws.java-regex=(\[[^|\]]+\|[^\]]+) (\]){replace-with}$1$2
Mediawiki.0430-re_links_external_alias.java-regex=\[((?:(?:https?)|(?:file)):\/\/\S+) ([^\]]*)\]{replace-with}[$2|$1]
Mediawiki.0500-re_h4.java-regex=(?s)={5}\s*(.*?)\s*={5}{replace-with}h4. $1
Mediawiki.0510-re_h3.java-regex=(?s)={4}\s*(.*?)\s*={4}{replace-with}h3. $1
Mediawiki.0520-re_h2.java-regex=(?s)={3}\s*(.*?)\s*={3}{replace-with}h2. $1
Mediawiki.0530-re_h1.java-regex=(?s)={2}\s*(.*?)\s*={2}{replace-with}h1. $1
Mediawiki.0540-re_title.java-regex=(^|\n)={1}([^=]+)={1}(\n|$){replace-with}$1h1. $2$3
#Mediawiki.0710-images_ws2underscore.class=com.atlassian.uwc.converters.mediawiki.ImageWhitespaceConverter
## Set the external-internal-links-identifier to your mediawiki's domain, if
## your users might have included links to raw internal mediawiki urls
## example: [http://mymediawiki.org/index.php/Some_Mediawiki_Page Link]
#Mediawiki.0790.external-internal-links.class=com.atlassian.uwc.converters.mediawiki.ExternalInternalLinksConverter
#Mediawiki.0790.external-internal-links-identifier.property=https?:\/\/wiki.someplace.com\/
Mediawiki.0800-attachments.class=com.atlassian.uwc.converters.mediawiki.AttachmentConverter
Mediawiki.0910-linebreaks.class=com.atlassian.uwc.converters.mediawiki.BreakConverter
## Lists
Mediawiki.0950-lists.java-regex=(^|\n)([*#]+)([^*#\s])([^*\n]*)(?=\n){replace-with}$1$2 $3$4
Mediawiki.0955-lists-w-bold.java-regex=(^|\n)([*#]*)([*])(\s)([^*\n]*?)( *[*]){replace-with}$1$2$4*$5*
Mediawiki.0960-definitionlists.class=com.atlassian.uwc.converters.mediawiki.DefinitionList
Mediawiki.0961-definitionlists-nodefinition.java-regex=(?<=^|\n);([^\n]+){replace-with}* $1
Mediawiki.0970-indenting.java-regex=(^|\n):+(.*){replace-with}$1$2
## SubPageList3
## If you used the SubPageList3 <splist ...> tag syntax to automatically
## list children, uncomment this converter to have them transformed to
## confluence children macro
#Mediawiki.0980-subpagelist3-children.class=com.atlassian.uwc.converters.mediawiki.SubpagelistConverter
## Discussion Pages -> Comments
## Use the delim properties to seperate the Discussion page into distinct
## comments. Here are 3 examples based on wikipedia discussion pages
## Tell the CommentConverter where the Discussion pages are in relation
## to the Pages by setting a relative directory in the location property
## For more info see UWC Mediawiki Notes - Comments section
#Mediawiki.1000.discussionpages2comments.class=com.atlassian.uwc.converters.mediawiki.CommentConverter
Mediawiki.1000.discussion-delim-end-1.property=\[\[User.*?UTC\)[^\n]*
Mediawiki.1000.discussion-delim-start-2.property=\n[=]
Mediawiki.1000.discussion-delim-start-3.property=\n[----]
Mediawiki.1000.discussion-location.property=../Discussions/
## Filenames
## strip out filename extensions when importing
Mediawiki.1010-remove-extension.class=com.atlassian.uwc.converters.ChopPageExtensionsConverter
## replace single _ with space
Mediawiki.1020-underscore2space.class=com.atlassian.uwc.converters.mediawiki.ConvertUnderscoresInTitle
## set this property to true if you want underscores in links to be translated to spaces. See UWC-291
Mediawiki.1021.underscore2space-links.property=false
## HTML
## HTML: prep for the sax parser
Mediawiki.1400.amp-entity.java-regex=[&](?![#a-zA-Z0-9]{2,5};){replace-with}&amp;
Mediawiki.1410.tokenize-math-again.java-regex-tokenizer=(^|\n):+((?s)<math>.*?<\/math>){replace-with}$1\n$2
Mediawiki.1411.tokenize-math-again.java-regex-tokenizer=((?s)<math>.*?<\/math>){replace-with}$1
## HTML: If you are getting sax errors complaining about namespaces that aren't
## bound, add converters like 1420 and 1421, to remove refs to them:
## where, t = the first letter of the tags that are having the problem
## and, x = the namespace that isn't bound
## 1420 handles an attribute with no value. Ex: x:abc
## 1421 handles an attribute with a value. Ex: x:foo="bar"
#Mediawiki.1420.unbound-namespace-noval.java-regex=<(t[^ >]* )[^>]*?x:[^ >]*{replace-with}<$1
#Mediawiki.1421.unbound-namespace-hasval.java-regex=<(t[^ >]* )[^>]*?(x:[^">]*"[^">]*"\s*)+{replace-with}<$1
## Alternative to XmlConverter for some unnested html syntaxes
#Mediawiki.1470.optionalunnestedhtml-bold.java-regex=<\/?b>{replace-with}*
#Mediawiki.1471.optionalunnestedhtml-ital.java-regex=<\/?i>{replace-with}_
#Mediawiki.1472.optionalunnestedhtml-strike.java-regex=<\/?strike>{replace-with}-
#Mediawiki.1473.optionalunnestedhtml-tt.java-regex=<tt>(.*?)</tt>{replace-with}{{$1}}
#Mediawiki.1474.optionalunnestedhtml-strong.java-regex=<\/?strong>{replace-with}*
#Mediawiki.1475.optionalunnestedhtml-u.java-regex=<\/?u>{replace-with}+
#Mediawiki.1476.optionalunnestedhtml-header.java-regex=<h(\d)>(.*?)</h\d>{replace-with}h$1. $2
#Mediawiki.1477.optionalunnestedhtml-em.java-regex=<\/?em>{replace-with}_
#Mediawiki.1478.optionalunnestedxml-graphiz.java-regex-tokenizer=<\/?graphviz[^>]*>{replace-with}{graphviz}
#Mediawiki.1479.optionalunnestedxml-emptycell.java-regex=> (<\/td>){replace-with}>&nbsp;&nbsp;$1
#Mediawiki.1480.optionalunnestedxml-tablenoparams.java-regex=<\/?((?:table)|(?:tr)|(?:td))>{replace-with}{$1}
#Mediawiki.1481.optionalunnestedxml-tableparams.class=com.atlassian.uwc.converters.mediawiki.UnnestedTableHtmlParams
#Mediawiki.1482.optionalunnestedxml-hidetable.java-regex-tokenizer=(?s)(\{table.*?\{table\}){replace-with}$1
## HTML: set up the parser events
Mediawiki.1501.bold.xmlevent={tag}b{class}com.atlassian.uwc.converters.xml.example.BoldParser
Mediawiki.1501.strong.xmlevent={tag}strong{class}com.atlassian.uwc.converters.xml.example.BoldParser
Mediawiki.1502.italic.xmlevent={tag}i{class}com.atlassian.uwc.converters.xml.ItalicParser
Mediawiki.1503.emph.xmlevent={tag}em{class}com.atlassian.uwc.converters.xml.ItalicParser
Mediawiki.1504.underline.xmlevent={tag}u{class}com.atlassian.uwc.converters.xml.UnderlineParser
Mediawiki.1505.monospace.xmlevent={tag}tt{class}com.atlassian.uwc.converters.xml.MonoParser
Mediawiki.1506.header.xmlevent={tag}h1, h2, h3, h4, h5, h6{class}com.atlassian.uwc.converters.xml.HeaderParser
Mediawiki.1507.lists.xmlevent={tag}ol,ul,li{class}com.atlassian.uwc.converters.xml.ListParser
Mediawiki.1508.horizrule.xmlevent={tag}hr{class}com.atlassian.uwc.converters.xml.HorizRuleParser
## If you want confluence tables output, use SimpleTableParser, or if you
## want content formatting plugin output, use ContentFormattingTableParser
Mediawiki.1509.table.xmlevent={tag}table,tr,td{class}com.atlassian.uwc.converters.xml.SimpleTableParser
#Mediawiki.1509.table.xmlevent={tag}table,tr,td,th{class}com.atlassian.uwc.converters.xml.ContentFormattingTableParser
## HTML: Parse the xml document - _after_ the Leading spaces conversion
Mediawiki.1690.xmlconverter.class=com.atlassian.uwc.converters.xml.XmlConverter
## For any tokenizer regex above, strip out tokens
Mediawiki.2000-detokenize.class=com.atlassian.uwc.converters.DetokenizerConverter
## Do math last, after math tags are detokenized
Mediawiki.2100-math.class=com.atlassian.uwc.converters.mediawiki.MathConverter