blob: f6b15fae4068be6e07aaccd44ff8db9a2efd8f84 [file] [log] [blame]
package org.apache.james.mime4j.dom;
import java.io.ByteArrayInputStream;
import java.io.UnsupportedEncodingException;
import org.apache.james.mime4j.message.BasicBodyFactory;
import org.apache.james.mime4j.message.DefaultMessageBuilder;
import org.junit.Assert;
import org.junit.Test;
/**
* check that the Charset handling of BasicBodyFactory can be influenced with
* the boolean lenient flag
*
* @author wf
*
*/
public class MessageCharsetLenientTest {
/**
* set up a message with an invalid charset
*
* @throws Exception
*/
@Test
public void testLenientCharsetHandling() throws Exception {
// this list of invalidCharsets is taken from parsing a sample of some 1/4 million e-mails
// so all of the showed up in real world e-mails
String invalidCharsets[] = {
"%CHARSET",
"'iso-8859-1'",
"'utf-8'",
"0",
"238",
"DEFAULT_CHARSET",
"DIN_66003",
"ISO 8859-1",
"None",
"Standard",
"UTF-7",
"X-CTEXT",
"X-UNKNOWN",
"\\iso-8859-1\"",
"\\us-ascii\"",
"ansi_x3.110-1983",
"charset=us-ascii",
"en",
"iso-0-250-250-250-25-0-25",
"iso-10646",
"iso-1149-1",
"iso-2191-1",
"iso-3817-4",
"iso-4736-8",
"iso-5266-7",
"iso-5666-3",
"iso-5978-6",
"iso-6558-5",
"iso-7708-8",
"iso-8085-5",
"iso-8589-0",
"iso-8814-4",
"iso-8859-1 name=FAQ.htm",
"iso-8859-16",
"iso-8859-1?",
"iso-8859-8-i",
"iso-9284-4",
"latin-iso8859-1",
"unicode-1-1-utf-7",
"unknown-8bit",
"utf-7",
"windows-1250 reply-type=original",
"windows-1252 <!DOCTYPE HTML PUBLIC -//W3C//DTD HTML 4.01 Transitional//EN>",
"x-user-defined", " {$RND_CHARSET$}" };
// check with lenient charset handling on and off
boolean[] lenientstates = { true, false };
// create the message builder
DefaultMessageBuilder builder = new DefaultMessageBuilder();
// count how many Exception hits we got
int invalidCount=0;
// test in bosh states
for (boolean lenient : lenientstates) {
// set how lenient we are
builder.setBodyFactory(new BasicBodyFactory(lenient));
// check the list of invalid Charsets
for (String invalidCharset : invalidCharsets) {
// create a message with the charset
String charsetContent = "Subject: my subject\r\n"
+ "Content-Type: text/plain; charset=" + invalidCharset + "\r\n"
+ "Strange charset isn't it?\r" + "\r\n";
// try parsing it
try {
Message message = builder.parseMessage(new ByteArrayInputStream(
charsetContent.getBytes("UTF-8")));
// check some message attribute
Assert.assertEquals("text/plain", message.getMimeType());
// if we get here we had a lenient mode - in non lenient an exception would have been thrown
Assert.assertTrue("Charset:"+invalidCharset+" should not be allowed when lenient is "+lenient,lenient);
} catch (UnsupportedEncodingException ex) {
Assert.assertFalse("Charset:"+invalidCharset+" should not throw an exception when lenient is "+lenient,lenient);
invalidCount++;
}
}
} // for
Assert.assertEquals(invalidCharsets.length,invalidCount);
}
}