TIKA-4249 -- allow utf8 bom to at start of rfc822 detection
diff --git a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
index 09bbd96..94318a0 100644
--- a/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
+++ b/tika-core/src/main/resources/org/apache/tika/mime/tika-mimetypes.xml
@@ -7035,6 +7035,7 @@
     <magic priority="45">
       <!-- be a bit more flexible, but require one from each of these -->
       <match minShouldMatch="2">
+
         <match minShouldMatch="1">
           <match value="Content-ID:" type="stringignorecase" offset="0"/>
           <match value="Content-Location:" type="stringignorecase" offset="0"/>
@@ -7055,6 +7056,27 @@
           <match value="User-Agent:" type="string" offset="0"/>
           <match value="X-Mailer:" type="string" offset="0"/>
           <match value="X-Originating-IP:" type="stringignorecase" offset="0"/>
+          <match value="0xefbbbf" type="string" offset="0">
+            <match value="Content-ID:" type="stringignorecase" offset="3"/>
+            <match value="Content-Location:" type="stringignorecase" offset="3"/>
+            <match value="Content-Transfer-Encoding:" type="stringignorecase" offset="3"/>
+            <match value="Content-Type:" type="stringignorecase" offset="3"/>
+            <match value="Date:" type="stringignorecase" offset="3"/>
+            <match value="Delivered-To:" type="string" offset="3"/>
+            <match value="From:" type="stringignorecase" offset="3"/>
+            <match value="Message-ID:" type="stringignorecase" offset="3"/>
+            <match value="MIME-Version:" type="stringignorecase" offset="3"/>
+            <match value="Received:" type="stringignorecase" offset="3"/>
+            <match value="Relay-Version:" type="stringignorecase" offset="3"/>
+            <match value="Return-Path:" type="stringignorecase" offset="3"/>
+            <match value="Sent:" type="string" offset="3"/>
+            <match value="Status:" type="string" offset="3"/>
+            <match value="Subject:" type="string" offset="3"/>
+            <match value="To:" type="string" offset="3"/>
+            <match value="User-Agent:" type="string" offset="3"/>
+            <match value="X-Mailer:" type="string" offset="3"/>
+            <match value="X-Originating-IP:" type="stringignorecase" offset="3"/>
+          </match>
         </match>
         <match minShouldMatch="1">
           <match value="\nContent-ID:" type="stringignorecase" offset="0:1024"/>
diff --git a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
index 434ff6c..84820ac 100644
--- a/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
+++ b/tika-core/src/test/java/org/apache/tika/mime/MimeDetectionTest.java
@@ -28,6 +28,8 @@
 import java.io.InputStream;
 import java.net.URL;
 
+import org.apache.commons.io.ByteOrderMark;
+import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
 
@@ -110,6 +112,27 @@
     }
 
     @Test
+    public void testRFC822WithBOM() throws Exception {
+        String header = "From: blah <blah@blah.com>\r\n" + "Received: Friday, January 24, 2020 3:24 PM\r\n" +
+                "To: someone@somewhere.com\r\n" + "Cc: someone-else@other.com\r\n" +
+                "Subject: Received\r\n";
+        MediaType rfc822 = MediaType.parse("message/rfc822");
+        assertEquals(rfc822, MIME_TYPES.detect(UnsynchronizedByteArrayInputStream
+                .builder()
+                .setByteArray(header.getBytes(UTF_8))
+                .get(), new Metadata()));
+
+        int utfLength = ByteOrderMark.UTF_8.length();
+        byte[] bytes = new byte[header.getBytes(UTF_8).length + utfLength];
+        System.arraycopy(ByteOrderMark.UTF_8.getBytes(), 0, bytes, 0, utfLength);
+        System.arraycopy(header.getBytes(UTF_8), 0, bytes, 3, header.getBytes(UTF_8).length);
+        assertEquals(rfc822, MIME_TYPES.detect(UnsynchronizedByteArrayInputStream
+                .builder()
+                .setByteArray(bytes)
+                .get(), new Metadata()));
+    }
+
+    @Test
     public void testSuperTypes() {
         assertTrue(REGISTRY.isSpecializationOf(MediaType.parse("text/something; charset=UTF-8"),
                 MediaType.parse("text/something")));