MIME4J-316 Parts missing in case of a specific combination of boundaries
The problem can be reproduced by parsing a very specific email structure, where
an inner boundary matches the name of another outer boundary followed by a "-" character.
In the following example, the attached pdf file will be ignored by the parser.
Merge pull request #2 from bluemind-net/bugfix/BM-18542 (#71)
BM-18542 Fix: check for -- boundary ending
Thanks @tommix000 for the fix.
diff --git a/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java b/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java
index 6eaa1ed..88cdfc0 100644
--- a/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java
+++ b/core/src/main/java/org/apache/james/mime4j/io/MimeBoundaryInputStream.java
@@ -244,11 +244,14 @@
// Make sure the boundary is terminated with EOS
break;
} else {
- // or with a whitespace or '-' char
+ // or with a whitespace or '--'
char ch = (char)(buffer.byteAt(pos));
- if (CharsetUtil.isWhitespace(ch) || ch == '-') {
+ if (CharsetUtil.isWhitespace(ch)) {
break;
}
+ if (ch == '-' && remaining > 1 && (char)(buffer.byteAt(pos+1)) == '-') {
+ break;
+ }
}
}
off = i + boundary.length;
diff --git a/core/src/test/resources/testmsgs/boundary-name-clash.msg b/core/src/test/resources/testmsgs/boundary-name-clash.msg
new file mode 100644
index 0000000..47580f7
--- /dev/null
+++ b/core/src/test/resources/testmsgs/boundary-name-clash.msg
@@ -0,0 +1,36 @@
+Subject: test
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="--boundary.1652331600846930886"
+
+----boundary.1652331600846930886
+Content-Type: multipart/alternative;
+ boundary="--boundary.1652331600846930886-1"
+
+----boundary.1652331600846930886-1
+Content-Type: text/plain; charset=utf-8
+
+Please see attachment for report Daily_Stats-2022-05-12-0700
+----boundary.1652331600846930886-1
+Content-Type: text/html; charset=utf-8
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html lang="en">
+ <head>
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+ </head>
+ <body>
+ <p>Please see attachment for report Daily_Stats-2022-05-12-0700.</p>
+ </body>
+</html>
+----boundary.1652331600846930886-1--
+----boundary.1652331600846930886
+Content-Type: application/pdf;
+ name="Daily_Stats-2022-05-12-0700.pdf"
+Content-Transfer-Encoding: base64
+Content-Disposition: Attachment;
+ filename="Daily_Stats-2022-05-12-0700.pdf"
+
+JVBERi0xLj4Kc3RhcnR4cmVmCjUzNjEwCiUlRU9GCg==
+
+----boundary.1652331600846930886--
diff --git a/core/src/test/resources/testmsgs/boundary-name-clash.out b/core/src/test/resources/testmsgs/boundary-name-clash.out
new file mode 100644
index 0000000..29a7183
--- /dev/null
+++ b/core/src/test/resources/testmsgs/boundary-name-clash.out
@@ -0,0 +1,37 @@
+Subject: test
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="--boundary.1652331600846930886"
+
+----boundary.1652331600846930886
+Content-Type: multipart/alternative;
+ boundary="--boundary.1652331600846930886-1"
+
+----boundary.1652331600846930886-1
+Content-Type: text/plain; charset=utf-8
+
+Please see attachment for report Daily_Stats-2022-05-12-0700
+----boundary.1652331600846930886-1
+Content-Type: text/html; charset=utf-8
+
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html lang="en">
+ <head>
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+ </head>
+ <body>
+ <p>Please see attachment for report Daily_Stats-2022-05-12-0700.</p>
+ </body>
+</html>
+----boundary.1652331600846930886-1--
+
+----boundary.1652331600846930886
+Content-Type: application/pdf;
+ name="Daily_Stats-2022-05-12-0700.pdf"
+Content-Transfer-Encoding: base64
+Content-Disposition: Attachment;
+ filename="Daily_Stats-2022-05-12-0700.pdf"
+
+JVBERi0xLj4Kc3RhcnR4cmVmCjUzNjEwCiUlRU9GCg==
+
+----boundary.1652331600846930886--
diff --git a/core/src/test/resources/testmsgs/boundary-name-clash.xml b/core/src/test/resources/testmsgs/boundary-name-clash.xml
new file mode 100644
index 0000000..0a5b805
--- /dev/null
+++ b/core/src/test/resources/testmsgs/boundary-name-clash.xml
@@ -0,0 +1,61 @@
+<message>
+<header>
+<field>
+Subject: test</field>
+<field>
+MIME-Version: 1.0</field>
+<field>
+Content-Type: multipart/mixed;
+ boundary="--boundary.1652331600846930886"</field>
+</header>
+<multipart>
+<body-part>
+<header>
+<field>
+Content-Type: multipart/alternative;
+ boundary="--boundary.1652331600846930886-1"</field>
+</header>
+<multipart>
+<body-part>
+<header>
+<field>
+Content-Type: text/plain; charset=utf-8</field>
+</header>
+<body>
+Please see attachment for report Daily_Stats-2022-05-12-0700</body>
+</body-part>
+<body-part>
+<header>
+<field>
+Content-Type: text/html; charset=utf-8</field>
+</header>
+<body>
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html lang="en">
+ <head>
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+ </head>
+ <body>
+ <p>Please see attachment for report Daily_Stats-2022-05-12-0700.</p>
+ </body>
+</html></body>
+</body-part>
+</multipart>
+</body-part>
+<body-part>
+<header>
+<field>
+Content-Type: application/pdf;
+ name="Daily_Stats-2022-05-12-0700.pdf"</field>
+<field>
+Content-Transfer-Encoding: base64</field>
+<field>
+Content-Disposition: Attachment;
+ filename="Daily_Stats-2022-05-12-0700.pdf"</field>
+</header>
+<body>
+JVBERi0xLj4Kc3RhcnR4cmVmCjUzNjEwCiUlRU9GCg==
+</body>
+</body-part>
+</multipart>
+</message>
diff --git a/core/src/test/resources/testmsgs/boundary-name-clash_decoded.xml b/core/src/test/resources/testmsgs/boundary-name-clash_decoded.xml
new file mode 100644
index 0000000..631493e
--- /dev/null
+++ b/core/src/test/resources/testmsgs/boundary-name-clash_decoded.xml
@@ -0,0 +1,49 @@
+<message>
+<header>
+<field>
+Subject: test</field>
+<field>
+MIME-Version: 1.0</field>
+<field>
+Content-Type: multipart/mixed;
+ boundary="--boundary.1652331600846930886"</field>
+</header>
+<multipart>
+<body-part>
+<header>
+<field>
+Content-Type: multipart/alternative;
+ boundary="--boundary.1652331600846930886-1"</field>
+</header>
+<multipart>
+<body-part>
+<header>
+<field>
+Content-Type: text/plain; charset=utf-8</field>
+</header>
+<text-body name="boundary-name-clash_decoded_1_1_1.txt"/>
+</body-part>
+<body-part>
+<header>
+<field>
+Content-Type: text/html; charset=utf-8</field>
+</header>
+<text-body name="boundary-name-clash_decoded_1_1_2.txt"/>
+</body-part>
+</multipart>
+</body-part>
+<body-part>
+<header>
+<field>
+Content-Type: application/pdf;
+ name="Daily_Stats-2022-05-12-0700.pdf"</field>
+<field>
+Content-Transfer-Encoding: base64</field>
+<field>
+Content-Disposition: Attachment;
+ filename="Daily_Stats-2022-05-12-0700.pdf"</field>
+</header>
+<binary-body name="boundary-name-clash_decoded_1_2.bin"/>
+</body-part>
+</multipart>
+</message>
diff --git a/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_1_1.txt b/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_1_1.txt
new file mode 100644
index 0000000..40c2f0e
--- /dev/null
+++ b/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_1_1.txt
@@ -0,0 +1 @@
+Please see attachment for report Daily_Stats-2022-05-12-0700
\ No newline at end of file
diff --git a/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_1_2.txt b/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_1_2.txt
new file mode 100644
index 0000000..289f5ae
--- /dev/null
+++ b/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_1_2.txt
@@ -0,0 +1,9 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
+<html lang="en">
+ <head>
+ <meta http-equiv="content-type" content="text/html; charset=UTF-8">
+ </head>
+ <body>
+ <p>Please see attachment for report Daily_Stats-2022-05-12-0700.</p>
+ </body>
+</html>
\ No newline at end of file
diff --git a/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_2.bin b/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_2.bin
new file mode 100644
index 0000000..9200839
--- /dev/null
+++ b/core/src/test/resources/testmsgs/boundary-name-clash_decoded_1_2.bin
@@ -0,0 +1,4 @@
+%PDF-1.>
+startxref
+53610
+%%EOF