Add improved support for math to latex math conversion (mediawiki)
diff --git a/conf/converter.mediawiki.properties b/conf/converter.mediawiki.properties
index 414db99..5763604 100644
--- a/conf/converter.mediawiki.properties
+++ b/conf/converter.mediawiki.properties
@@ -26,6 +26,7 @@
#Mediawiki.0004.userdate.class=com.atlassian.uwc.converters.mediawiki.UserDateConverter
#Mediawiki.0004.users-must-exist.property=true
#Mediawiki.0004.userdate-disabled.property=false
+Mediawiki.0004.tokenize-math.java-regex-tokenizer=(^|\n):+((?s)<math>.*?<\/math>){replace-with}$1\n$2
Mediawiki.0005.tokenize-math.java-regex-tokenizer=((?s)<math>.*?<\/math>){replace-with}$1
## Hierarchy
## If your hierarchy can be represented in the page content, uncomment and
@@ -171,7 +172,8 @@
## HTML
## HTML: prep for the sax parser
Mediawiki.1400.amp-entity.java-regex=[&](?![#a-zA-Z0-9]{2,5};){replace-with}&
-Mediawiki.1410.tokenize-math-again.java-regex-tokenizer=((?s)<math>.*?<\/math>){replace-with}$1
+Mediawiki.1410.tokenize-math-again.java-regex-tokenizer=(^|\n):+((?s)<math>.*?<\/math>){replace-with}$1\n$2
+Mediawiki.1411.tokenize-math-again.java-regex-tokenizer=((?s)<math>.*?<\/math>){replace-with}$1
## HTML: If you are getting sax errors complaining about namespaces that aren't
## bound, add converters like 1420 and 1421, to remove refs to them:
## where, t = the first letter of the tags that are having the problem
diff --git a/sampleData/mediawiki/SampleMediawiki-ExpectedMath.txt b/sampleData/mediawiki/SampleMediawiki-ExpectedMath.txt
index fcfd16a..479d986 100644
--- a/sampleData/mediawiki/SampleMediawiki-ExpectedMath.txt
+++ b/sampleData/mediawiki/SampleMediawiki-ExpectedMath.txt
@@ -1,33 +1,22 @@
This sample is for testing math syntax
-{latex}
-\begin{eqnarray}
-{
-g(x,y)\,
-}
-\end{eqnarray}
-{latex}
+Some block equations:
-{latex}
+{mathblock}
\begin{eqnarray}
-{
-f(x) = \int_0^1 e^{-t} g(t) \, dt.
-}
+g(x,y)\,
\end{eqnarray}
-{latex}
+{mathblock}
+
+{mathblock:anchor=test}
+\begin{eqnarray}
+f(x) = \int_0^1 e^{-t} g(t) \, dt.
+\end{eqnarray}
+{mathblock}
+
+Some inline equations:
+{mathinline}E=mc^2{mathinline} is a famous equation. {mathinline}\pi{mathinline} is a famous number.
Escaping percents:
-{latex}
-\begin{eqnarray}
-{
-A_{B} = 100\% - C_{D}
-}
-\end{eqnarray}
-{latex}
-{latex}
-\begin{eqnarray}
-{
-A_{B} = 100\% - 200\% + C_{D}
-}
-\end{eqnarray}
-{latex}
+{mathinline}A_{B} = 100\% - C_{D}{mathinline}
+{mathinline}A_{B} = 100\% - 200\% + C_{D}{mathinline}
diff --git a/sampleData/mediawiki/SampleMediawiki-InputMath.txt b/sampleData/mediawiki/SampleMediawiki-InputMath.txt
index 7a5ca68..c49cbf3 100644
--- a/sampleData/mediawiki/SampleMediawiki-InputMath.txt
+++ b/sampleData/mediawiki/SampleMediawiki-InputMath.txt
@@ -1,8 +1,12 @@
This sample is for testing math syntax
-<math>g(x,y)\,</math>
+Some block equations:
+:<math>g(x,y)\,</math>
-<math>f(x) = \int_0^1 e^{-t} g(t) \, dt.</math>
+<math>\label{test} f(x) = \int_0^1 e^{-t} g(t) \, dt.</math>
+
+Some inline equations:
+<math>E=mc^2</math> is a famous equation. <math>\pi</math> is a famous number.
Escaping percents:
<math>A_{B} = 100% - C_{D}</math>
diff --git a/src/com/atlassian/uwc/converters/mediawiki/MathConverter.java b/src/com/atlassian/uwc/converters/mediawiki/MathConverter.java
index 7aef87e..8dfee77 100644
--- a/src/com/atlassian/uwc/converters/mediawiki/MathConverter.java
+++ b/src/com/atlassian/uwc/converters/mediawiki/MathConverter.java
@@ -15,7 +15,8 @@
page.setConvertedText(converted);
}
- Pattern math = Pattern.compile("<math>(.*?)<\\/math>", Pattern.DOTALL);
+ Pattern math = Pattern.compile("<math>(.*?)<\\/math>(?=(..|.$|$))", Pattern.DOTALL);
+ Pattern label = Pattern.compile("\\\\label{(\\w+)}", Pattern.DOTALL);
protected String convertMath(String input) {
Matcher mathFinder = math.matcher(input);
StringBuffer sb = new StringBuffer();
@@ -24,11 +25,41 @@
found = true;
String mathbits = mathFinder.group(1);
mathbits = escapePercents(mathbits);
- String replacement = "{latex}\n" +
- "\\begin{eqnarray}\n" +
- "{\n" + mathbits +"\n}\n" +
- "\\end{eqnarray}\n" +
- "{latex}";
+
+ int len = sb.length();
+ boolean newlinesBefore = (len==0) ||
+ ((len==1) && (sb.substring(0,1).equals("\n")))
+ || (sb.substring(len-2,len).equals("\n\n"));
+
+ String after = mathFinder.group(2);
+ len = after.length();
+ boolean newlinesAfter = (len==0) ||
+ ((len==1) && (after.equals("\n")))
+ || (after.equals("\n\n"));
+
+ String replacement;
+ if (newlinesBefore && newlinesAfter) {
+ // Looks line a mathblock
+
+ String anchor = "";
+ Matcher labelFinder = label.matcher(mathbits);
+ if (labelFinder.find()) {
+ anchor = labelFinder.group(1).trim();
+ replacement = labelFinder.replaceAll(" ");
+ }
+ if (!anchor.isEmpty()) {
+ anchor = ":anchor="+anchor;
+ }
+
+ replacement = "{mathblock"+anchor+"}\n" +
+ "\\begin{eqnarray}\n" +
+ mathbits +"\n" +
+ "\\end{eqnarray}\n" +
+ "{mathblock}";
+ } else {
+ replacement = "{mathinline}" + mathbits + "{mathinline}";
+ }
+
replacement = RegexUtil.handleEscapesInReplacement(replacement);
mathFinder.appendReplacement(sb, replacement);
}
diff --git a/src/com/atlassian/uwc/converters/mediawiki/MathConverterTest.java b/src/com/atlassian/uwc/converters/mediawiki/MathConverterTest.java
index 18095fd..d2062b0 100644
--- a/src/com/atlassian/uwc/converters/mediawiki/MathConverterTest.java
+++ b/src/com/atlassian/uwc/converters/mediawiki/MathConverterTest.java
@@ -16,36 +16,26 @@
public void testConvertMath() {
String input, expected, actual;
- input = "<math>g(x,y)\\,</math>\n" +
+ input = "<math>\\label{test} g(x,y)\\,</math>\n" +
"\n" +
- "<math>f(x) = \\int_0^1 e^{-t} g(t) \\, dt.</math>\n" +
+ "Inline math: <math>f(x) = \\int_0^1 e^{-t} g(t) \\, dt.</math>\n" +
"\n" +
"<math>A_{B} = 100% - 200% + C_{D}</math>\n" +
"\n" +
"";
- expected = "{latex}\n" +
- "\\begin{eqnarray}\n" +
- "{\n" +
- "g(x,y)\\,\n" +
- "}\n" +
+ expected = "{mathblock:anchor=test}\n" +
+ "\\begin{eqnarray}\n" +
+ "g(x,y)\\,\n" +
"\\end{eqnarray}\n" +
- "{latex}\n" +
+ "{mathblock}\n" +
"\n" +
- "{latex}\n" +
- "\\begin{eqnarray}\n" +
- "{\n" +
- "f(x) = \\int_0^1 e^{-t} g(t) \\, dt.\n" +
- "}\n" +
- "\\end{eqnarray}\n" +
- "{latex}\n" +
+ "Inline math: {mathinline}f(x) = \\int_0^1 e^{-t} g(t) \\, dt.{mathinline}\n" +
"\n" +
- "{latex}\n" +
- "\\begin{eqnarray}\n" +
- "{\n" +
- "A_{B} = 100\\% - 200\\% + C_{D}\n" +
- "}\n" +
+ "{mathblock}\n" +
+ "\\begin{eqnarray}\n" +
+ "A_{B} = 100\\% - 200\\% + C_{D}\n" +
"\\end{eqnarray}\n" +
- "{latex}\n" +
+ "{mathblock}\n" +
"\n";
actual = tester.convertMath(input);
assertNotNull(actual);