MIME4J-328 Fix DecoderUtil split point (#101)
Huge thanks to Chung dae hyun
diff --git a/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java b/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
index 14a981d..5526b8f 100644
--- a/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
+++ b/core/src/main/java/org/apache/james/mime4j/codec/DecoderUtil.java
@@ -19,6 +19,7 @@
package org.apache.james.mime4j.codec;
+import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.lang.ref.SoftReference;
@@ -28,7 +29,6 @@
import org.apache.james.mime4j.io.InputStreams;
import org.apache.james.mime4j.util.BufferRecycler;
-import org.apache.james.mime4j.util.ByteArrayBuffer;
import org.apache.james.mime4j.util.CharsetUtil;
import org.apache.james.mime4j.util.RecycledByteArrayBuffer;
@@ -122,6 +122,10 @@
return new String(decodedBytes, charset);
}
+ static byte[] decodeByteAryB(String encodedText, DecodeMonitor monitor) throws UnsupportedEncodingException {
+ return decodeBase64(encodedText, monitor);
+ }
+
/**
* Decodes an encoded text encoded with the 'Q' encoding (described in
* RFC 2047) found in a header field body.
@@ -140,6 +144,10 @@
return new String(decodedBytes, charset);
}
+ static byte[] decodeByteAryQ(String encodedText, DecodeMonitor monitor) throws UnsupportedEncodingException {
+ return decodeQuotedPrintable(replaceUnderscores(encodedText), monitor);
+ }
+
static String decodeEncodedWords(String body) {
return decodeEncodedWords(body, DecodeMonitor.SILENT);
}
@@ -208,8 +216,11 @@
Map<Charset, Charset> charsetOverrides)
throws IllegalArgumentException {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
StringBuilder sb = new StringBuilder();
int position = 0;
+ String mimeCharset ="";
+ String encoding ="";
while (position < body.length()) {
int startPattern = body.indexOf("=?", position);
@@ -217,6 +228,7 @@
if (position == 0) {
return body;
}
+ appendStringBuffer(fallback, charsetOverrides, out, sb, mimeCharset);
sb.append(body, position, body.length());
break;
}
@@ -227,29 +239,48 @@
if (charsetEnd < 0 || encodingEnd < 0 || encodedTextEnd < 0) {
// Invalid pattern
+ appendStringBuffer(fallback, charsetOverrides, out, sb, mimeCharset);
sb.append(body, position, startPattern + 2);
position = startPattern + 2;
} else if (encodingEnd == encodedTextEnd) {
+ appendStringBuffer(fallback, charsetOverrides, out, sb, mimeCharset);
sb.append(body, position, Math.min(encodedTextEnd + 2, body.length()));
position = encodedTextEnd +2;
} else {
String separator = body.substring(position, startPattern);
if ((!CharsetUtil.isWhitespace(separator) || position == 0) && !separator.isEmpty()) {
+ appendStringBuffer(fallback, charsetOverrides, out, sb, mimeCharset);
sb.append(separator);
}
- String mimeCharset = body.substring(startPattern + 2, charsetEnd);
- String encoding = body.substring(charsetEnd + 1, encodingEnd);
+ String mimeCurCharset = body.substring(startPattern + 2, charsetEnd);
+ String curEncoding = body.substring(charsetEnd + 1, encodingEnd);
String encodedText = body.substring(encodingEnd + 1, encodedTextEnd);
+ if (!mimeCharset.isEmpty() && !mimeCurCharset.equals(mimeCharset)){
+ appendStringBuffer(fallback, charsetOverrides, out, sb, mimeCharset);
+ }
+
+ if (!encoding.isEmpty() && !curEncoding.equals(encoding)){
+ appendStringBuffer(fallback, charsetOverrides, out, sb, mimeCharset);
+ }
+
+ mimeCharset=mimeCurCharset;
+ encoding=curEncoding;
+
if (encodedText.isEmpty()) {
position = encodedTextEnd + 2;
continue;
}
- String decoded;
- decoded = tryDecodeEncodedWord(mimeCharset, encoding, encodedText, monitor, fallback, charsetOverrides);
+ byte []decoded;
+
+ decoded = tryDecodeEncodedWord(mimeCurCharset, curEncoding, encodedText, monitor, fallback, charsetOverrides);
if (decoded != null) {
- if (!CharsetUtil.isWhitespace(decoded) && !decoded.isEmpty()) {
- sb.append(decoded);
+ if (0 < decoded.length) {
+ try {
+ out.write(decoded);
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
}
} else {
sb.append(body, startPattern, encodedTextEnd + 2);
@@ -257,11 +288,22 @@
position = encodedTextEnd + 2;
}
}
+ appendStringBuffer(fallback, charsetOverrides, out, sb,mimeCharset);
+
return sb.toString();
}
+ private static void appendStringBuffer(Charset fallback, Map<Charset, Charset> charsetOverrides, ByteArrayOutputStream out, StringBuilder sb, String mimeCharset) {
+ if (0 < out.size()) {
+ byte[] byTemp = out.toByteArray();
+ Charset charset = lookupCharset(mimeCharset, fallback, charsetOverrides);
+ sb.append(new String(byTemp, charset));
+ out.reset();
+ }
+ }
+
// return null on error
- private static String tryDecodeEncodedWord(
+ private static byte[] tryDecodeEncodedWord(
final String mimeCharset,
final String encoding,
final String encodedText,
@@ -283,9 +325,9 @@
try {
if (encoding.equalsIgnoreCase("Q")) {
- return DecoderUtil.decodeQ(encodedText, charset.name(), monitor);
+ return decodeByteAryQ(encodedText, monitor);
} else if (encoding.equalsIgnoreCase("B")) {
- return DecoderUtil.decodeB(encodedText, charset.name(), monitor);
+ return decodeByteAryB(encodedText, monitor);
} else {
monitor(monitor, mimeCharset, encoding, encodedText, "leaving word encoded",
"Warning: Unknown encoding in encoded word");
diff --git a/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java b/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
index 6a76d99..e4f5c64 100644
--- a/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
+++ b/core/src/test/java/org/apache/james/mime4j/codec/DecoderUtilTest.java
@@ -36,6 +36,13 @@
}
@Test
+ public void testDoubleLineBEncoding() {
+ String s = DecoderUtil.decodeEncodedWords("=?utf-8?B?W1NQQU1dIFJlOiBbbWNsb3VkLWJhcmlzdGFdIO2BtOudvOyasOuTnOuwlOumrOyKpO2DgCA37LCoIO2WieyCrC3rsJztkQ==?=\n" +
+ "=?utf-8?B?nOyekOujjCDtj6zrqacg6rO17Jyg?= ", DecodeMonitor.STRICT);
+ Assert.assertEquals("[SPAM] Re: [mcloud-barista] 클라우드바리스타 7차 행사-발표자료 포멧 공유 ", s);
+ }
+
+ @Test
public void testDecodeQ() throws UnsupportedEncodingException {
String s = DecoderUtil.decodeQ("=e1_=e2=09=E3_=E4_", "ISO8859-1", DecodeMonitor.STRICT);
Assert.assertEquals("\u00e1 \u00e2\t\u00e3 \u00e4 ", s);