blob: f6b15fae4068be6e07aaccd44ff8db9a2efd8f84 [file] [log] [blame]
package org.apache.james.mime4j.dom;
import org.apache.james.mime4j.message.BasicBodyFactory;
import org.apache.james.mime4j.message.DefaultMessageBuilder;
import org.junit.Assert;
import org.junit.Test;
* check that the Charset handling of BasicBodyFactory can be influenced with
* the boolean lenient flag
* @author wf
public class MessageCharsetLenientTest {
* set up a message with an invalid charset
* @throws Exception
public void testLenientCharsetHandling() throws Exception {
// this list of invalidCharsets is taken from parsing a sample of some 1/4 million e-mails
// so all of the showed up in real world e-mails
String invalidCharsets[] = {
"ISO 8859-1",
"iso-8859-1 name=FAQ.htm",
"windows-1250 reply-type=original",
"windows-1252 <!DOCTYPE HTML PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>",
"x-user-defined", " {$RND_CHARSET$}" };
// check with lenient charset handling on and off
boolean[] lenientstates = { true, false };
// create the message builder
DefaultMessageBuilder builder = new DefaultMessageBuilder();
// count how many Exception hits we got
int invalidCount=0;
// test in bosh states
for (boolean lenient : lenientstates) {
// set how lenient we are
builder.setBodyFactory(new BasicBodyFactory(lenient));
// check the list of invalid Charsets
for (String invalidCharset : invalidCharsets) {
// create a message with the charset
String charsetContent = "Subject: my subject\r\n"
+ "Content-Type: text/plain; charset=" + invalidCharset + "\r\n"
+ "Strange charset isn't it?\r" + "\r\n";
// try parsing it
try {
Message message = builder.parseMessage(new ByteArrayInputStream(
// check some message attribute
Assert.assertEquals("text/plain", message.getMimeType());
// if we get here we had a lenient mode - in non lenient an exception would have been thrown
Assert.assertTrue("Charset:"+invalidCharset+" should not be allowed when lenient is "+lenient,lenient);
} catch (UnsupportedEncodingException ex) {
Assert.assertFalse("Charset:"+invalidCharset+" should not throw an exception when lenient is "+lenient,lenient);
} // for