mediawiki leading spaces converter tokenizes the results by default
diff --git a/conf/converter.mediawiki.properties b/conf/converter.mediawiki.properties
index 74ed352..1acf64f 100644
--- a/conf/converter.mediawiki.properties
+++ b/conf/converter.mediawiki.properties
@@ -197,8 +197,6 @@
## want content formatting plugin output, use ContentFormattingTableParser
Mediawiki.1509.table.xmlevent={tag}table,tr,td{class}com.atlassian.uwc.converters.xml.SimpleTableParser
#Mediawiki.1509.table.xmlevent={tag}table,tr,td,th{class}com.atlassian.uwc.converters.xml.ContentFormattingTableParser
-## HTML: Parse the xml document
-Mediawiki.1590.xmlconverter.class=com.atlassian.uwc.converters.xml.XmlConverter
## Leading Spaces -> panel or noformat macros
## Set leading-spaces-noformat property to true if you want the output
## to be noformat lines instead of one big panel macro.
@@ -208,6 +206,8 @@
Mediawiki.1600.leading-spaces-noformat.property=false
#Mediawiki.1600.leading-spaces-delim.property=code
Mediawiki.1600-ws2panel.class=com.atlassian.uwc.converters.mediawiki.LeadingSpacesConverter
+## HTML: Parse the xml document - _after_ the Leading spaces conversion
+Mediawiki.1690.xmlconverter.class=com.atlassian.uwc.converters.xml.XmlConverter
## For any tokenizer regex above, strip out tokens
Mediawiki.2000-detokenize.class=com.atlassian.uwc.converters.DetokenizerConverter
## Do math last, after math tags are detokenized
diff --git a/src/com/atlassian/uwc/converters/BaseConverter.java b/src/com/atlassian/uwc/converters/BaseConverter.java
index 508ce9f..e0d2a2a 100644
--- a/src/com/atlassian/uwc/converters/BaseConverter.java
+++ b/src/com/atlassian/uwc/converters/BaseConverter.java
@@ -4,7 +4,9 @@
import org.apache.log4j.Logger;
+import com.atlassian.uwc.converters.twiki.JavaRegexAndTokenizerConverter;
import com.atlassian.uwc.ui.ConverterErrors;
+import com.atlassian.uwc.ui.Page;
import com.atlassian.uwc.ui.listeners.FeedbackHandler;
/**
@@ -107,4 +109,23 @@
public void setProperties(Properties properties) {
this.properties = properties;
}
+
+ /**
+ * convenience method for tokenizing content within a converter
+ * @param input content to be search/replaced
+ * @param value tokenization property value, same as in properties file. Looks like:
+ * (<math>.*?<\\/math>){replace-multiline-with}$1
+ * @param type text describing the type of token, used in debug log messages
+ * @return tokenized content
+ */
+ public String tokenize(String input, String value, String type) {
+ Converter converter = JavaRegexAndTokenizerConverter.getConverter(value);
+ Page page = new Page(null);
+ page.setOriginalText(input);
+ converter.convert(page);
+ if (input != null && !input.equals(page.getConvertedText())) {
+ log.debug("Tokenized " + type + " content");
+ }
+ return page.getConvertedText();
+ }
}
diff --git a/src/com/atlassian/uwc/converters/LeadingSpacesBaseConverter.java b/src/com/atlassian/uwc/converters/LeadingSpacesBaseConverter.java
index a3de079..07dff02 100644
--- a/src/com/atlassian/uwc/converters/LeadingSpacesBaseConverter.java
+++ b/src/com/atlassian/uwc/converters/LeadingSpacesBaseConverter.java
@@ -16,7 +16,7 @@
protected String initialspacedelim = " ";
private String newline = "\n";
private String noNewlines = "[^\n]+";
- private String optNoSpace = "([^ ]?)";
+ private String optNoSpace = "(?=[^ ]?)";
private String leadingSpaceLine = "(" + initialspacedelim + noNewlines + newline + ")";
private String manyLeadingSpaceLines = "(" + leadingSpaceLine + "+)";
private String regex = "(?:\n|^)" + manyLeadingSpaceLines + optNoSpace;
@@ -49,8 +49,9 @@
public String getReplacement(String delim, String enddelim) {
return "\n" + delim +
- "\n$1" + enddelim +
- "\n$3";
+ "\n$1" + enddelim +
+ "\n";
+// "\n$3";
}
public String getReplacementLoopUtil(String delim, String enddelim) {
diff --git a/src/com/atlassian/uwc/converters/mediawiki/EscapeBracesConverter.java b/src/com/atlassian/uwc/converters/mediawiki/EscapeBracesConverter.java
index b522d77..5d3d7a3 100644
--- a/src/com/atlassian/uwc/converters/mediawiki/EscapeBracesConverter.java
+++ b/src/com/atlassian/uwc/converters/mediawiki/EscapeBracesConverter.java
@@ -37,16 +37,6 @@
private String tokenize(String input, String value) {
return tokenize(input, value, "");
}
- private String tokenize(String input, String value, String type) {
- Converter converter = JavaRegexAndTokenizerConverter.getConverter(value);
- Page page = new Page(null);
- page.setOriginalText(input);
- converter.convert(page);
- if (input != null && !input.equals(page.getConvertedText())) {
- log.debug("EscapeBraces: tokenized " + type + " content");
- }
- return page.getConvertedText();
- }
protected String tokenizeMath(String input) {
return tokenize(input, "(<math>.*?<\\/math>){replace-multiline-with}$1", "math");
diff --git a/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverter.java b/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverter.java
index 1eea214..9b9aee4 100644
--- a/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverter.java
+++ b/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverter.java
@@ -29,6 +29,8 @@
*
*/
public class LeadingSpacesConverter extends LeadingSpacesBaseConverter {
+ public static final String PROPKEY_TOKENIZE = "leading-spaces-tokenize";
+ private static final String DEFAULTVAL_TOKENIZE = "true";
Logger log = Logger.getLogger(this.getClass());
private Pattern leadingspaces = Pattern.compile("" +
"(?<=\n|^) +[^\n]+");
@@ -61,16 +63,40 @@
log.debug("leading spaces -> panel");
converted = convertLeadingSpacesReplaceAll(input, leadingSpacesPattern, getReplacement());
}
-
+ if (shouldTokenize()) {
+ converted = tokenize(converted, getTokenizeRegex(), "leading spaces");
+ }
page.setConvertedText(converted);
log.debug("Converting Leading Spaces - complete");
}
+
+
+ private String getTokenizeRegex() {
+ return "(" +
+ "\\Q" + getDelimiter() + "\\E" +
+ ".*?" +
+ "\\Q" + getDelimiter() + "\\E" +
+ ")" +
+ "{replace-multiline-with}$1";
+ }
+
+
+ protected boolean shouldTokenize() {
+ return Boolean.parseBoolean(getProperties().getProperty(PROPKEY_TOKENIZE, DEFAULTVAL_TOKENIZE));
+ }
private String getReplacement() {
- String delim = getProperties().getProperty("leading-spaces-delim", "panel");
+ String delim = getDelimiter();
log.debug("Leading spaces replacement delim: " + delim);
- return getReplacement("{"+delim+"}");
+ return getReplacement(delim);
+ }
+
+
+ protected String getDelimiter() {
+ String delim = getProperties().getProperty("leading-spaces-delim", "panel");
+ delim = "{"+delim+"}";
+ return delim;
}
}
diff --git a/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverterTest.java b/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverterTest.java
index ab349ca..f8d571c 100644
--- a/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverterTest.java
+++ b/src/com/atlassian/uwc/converters/mediawiki/LeadingSpacesConverterTest.java
@@ -1,6 +1,7 @@
package com.atlassian.uwc.converters.mediawiki;
import java.util.Properties;
+import java.util.Stack;
import junit.framework.TestCase;
@@ -9,6 +10,7 @@
import com.atlassian.uwc.converters.LeadingSpacesBaseConverter;
import com.atlassian.uwc.ui.Page;
+import com.atlassian.uwc.util.TokenMap;
public class LeadingSpacesConverterTest extends TestCase {
@@ -17,6 +19,10 @@
protected void setUp() throws Exception {
tester = new LeadingSpacesConverter();
PropertyConfigurator.configure("log4j.properties");
+ Properties properties = new Properties();
+ //old behavior for purposes of not having to update the tests
+ properties.setProperty(LeadingSpacesConverter.PROPKEY_TOKENIZE, "false");
+ tester.setProperties(properties);
}
public void testConvertPage() {
@@ -40,7 +46,7 @@
}
public void testConvertPage_codedelim() {
- Properties props = new Properties();
+ Properties props = tester.getProperties();
props.setProperty("leading-spaces-delim", "code");
tester.setProperties(props);
String input, expected, actual;
@@ -64,7 +70,7 @@
public void testConvertPage_startsatbeginning() {
- Properties props = new Properties();
+ Properties props = tester.getProperties();
props.setProperty("leading-spaces-delim", "code");
tester.setProperties(props);
String input, expected, actual;
@@ -101,7 +107,7 @@
}
public void testIgnoreListsWithBold() {
- Properties props = new Properties();
+ Properties props = tester.getProperties();
props.setProperty("leading-spaces-delim", "code");
props.setProperty("leading-spaces-noformat", "false");
tester.setProperties(props);
@@ -122,4 +128,77 @@
assertEquals(expected, actual);
}
+ public void testConvertPage_tokenized() {
+ tester.getProperties().clear(); //is same as: props.setProperty(LeadingSpacesConverter.PROPKEY_TOKENIZE, "true"); \
+ Properties props = tester.getProperties();
+ props.setProperty("leading-spaces-delim", "code");
+
+ tester.setProperties(props);
+ String input, expected, actual;
+ input = " abc\n" +
+ " def\n";
+ String startswith = "\n" +
+ "~UWCTOKENSTART~";
+ String endswith = "~UWCTOKENEND~\n";
+ Page page = new Page(null);
+ page.setOriginalText(input);
+ page.setConvertedText(input);
+ tester.convert(page);
+ actual = page.getConvertedText();
+ assertNotNull(actual);
+ assertTrue(actual, actual.startsWith(startswith));
+ assertTrue(actual, actual.endsWith(endswith));
+
+ expected = "\n" +
+ "{code}\n" +
+ " abc\n" +
+ " def\n" +
+ "{code}\n";
+ Stack<String> keys = TokenMap.getKeys();
+ assertNotNull(keys);
+ assertFalse(keys.isEmpty());
+ String detokenizeText = TokenMap.detokenizeText(actual);
+ assertNotNull(actual);
+ assertEquals(expected, detokenizeText);
+ }
+ public void testConvertPage_problem1() {
+ Properties props = tester.getProperties();
+ props.setProperty("leading-spaces-delim", "code");
+ tester.setProperties(props);
+ String input, expected, actual;
+ input = "Tralala\n" +
+ "\n" +
+ " asldkjas: http://lakdjlaskjd/\n" +
+ " aslkdjasd: http://amsdkjahsd/\n" +
+ "\n" +
+ " <problem>\n" +
+ " \n" +
+ " <ok>\n" +
+ " </ok>\n" +
+ "";
+ expected = "Tralala\n" +
+ "\n" +
+ "{code}\n" +
+ " asldkjas: http://lakdjlaskjd/\n" +
+ " aslkdjasd: http://amsdkjahsd/\n" +
+ "{code}\n" +
+ "\n" +
+ "{code}\n" +
+ " <problem>\n" +
+ "{code}\n" +
+ " \n" +
+ "{code}\n" +
+ " <ok>\n" +
+ " </ok>\n" +
+ "{code}\n" +
+ "";
+ Page page = new Page(null);
+ page.setOriginalText(input);
+ page.setConvertedText(input);
+ tester.convert(page);
+ actual = page.getConvertedText();
+ assertNotNull(actual);
+ assertEquals(expected, actual);
+ }
+
}