| /**************************************************************** |
| * Licensed to the Apache Software Foundation (ASF) under one * |
| * or more contributor license agreements. See the NOTICE file * |
| * distributed with this work for additional information * |
| * regarding copyright ownership. The ASF licenses this file * |
| * to you under the Apache License, Version 2.0 (the * |
| * "License"); you may not use this file except in compliance * |
| * with the License. You may obtain a copy of the License at * |
| * * |
| * http://www.apache.org/licenses/LICENSE-2.0 * |
| * * |
| * Unless required by applicable law or agreed to in writing, * |
| * software distributed under the License is distributed on an * |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * |
| * KIND, either express or implied. See the License for the * |
| * specific language governing permissions and limitations * |
| * under the License. * |
| ****************************************************************/ |
| |
| package org.apache.james.mailbox.opensearch.json; |
| |
| import static org.assertj.core.api.Assertions.assertThat; |
| import static org.mockito.ArgumentMatchers.any; |
| import static org.mockito.Mockito.mock; |
| import static org.mockito.Mockito.when; |
| |
| import java.io.IOException; |
| import java.time.ZoneId; |
| import java.util.Optional; |
| |
| import javax.mail.Flags; |
| |
| import org.apache.james.mailbox.MessageUid; |
| import org.apache.james.mailbox.ModSeq; |
| import org.apache.james.mailbox.opensearch.IndexAttachments; |
| import org.apache.james.mailbox.extractor.ParsedContent; |
| import org.apache.james.mailbox.extractor.TextExtractor; |
| import org.apache.james.mailbox.inmemory.InMemoryMessageId; |
| import org.apache.james.mailbox.model.AttachmentId; |
| import org.apache.james.mailbox.model.AttachmentMetadata; |
| import org.apache.james.mailbox.model.MessageAttachmentMetadata; |
| import org.apache.james.mailbox.model.MessageId; |
| import org.apache.james.mailbox.model.TestId; |
| import org.apache.james.mailbox.model.ThreadId; |
| import org.apache.james.mailbox.store.extractor.DefaultTextExtractor; |
| import org.apache.james.mailbox.store.mail.model.MailboxMessage; |
| import org.apache.james.mailbox.tika.TikaConfiguration; |
| import org.apache.james.mailbox.tika.TikaExtension; |
| import org.apache.james.mailbox.tika.TikaHttpClientImpl; |
| import org.apache.james.mailbox.tika.TikaTextExtractor; |
| import org.apache.james.metrics.tests.RecordingMetricFactory; |
| import org.junit.jupiter.api.BeforeEach; |
| import org.junit.jupiter.api.Test; |
| import org.junit.jupiter.api.extension.RegisterExtension; |
| |
| import com.google.common.collect.ImmutableList; |
| import com.google.common.collect.ImmutableMap; |
| |
| import reactor.core.publisher.Mono; |
| |
| class IndexableMessageTest { |
| static final MessageUid MESSAGE_UID = MessageUid.of(154); |
| |
| @RegisterExtension |
| static TikaExtension tika = new TikaExtension(); |
| |
| TikaTextExtractor textExtractor; |
| |
| @BeforeEach |
| void setUp() throws Exception { |
| textExtractor = new TikaTextExtractor(new RecordingMetricFactory(), new TikaHttpClientImpl(TikaConfiguration.builder() |
| .host(tika.getIp()) |
| .port(tika.getPort()) |
| .timeoutInMillis(tika.getTimeoutInMillis()) |
| .build())); |
| } |
| |
| @Test |
| void hasAttachmentsShouldReturnTrueWhenNonInlined() throws IOException { |
| //Given |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| InMemoryMessageId messageId = InMemoryMessageId.of(42); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(messageId); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| when(mailboxMessage.getAttachments()) |
| .thenReturn(ImmutableList.of(MessageAttachmentMetadata.builder() |
| .attachment(AttachmentMetadata.builder() |
| .messageId(messageId) |
| .attachmentId(AttachmentId.from("1")) |
| .type("text/plain") |
| .size(36) |
| .build()) |
| .isInline(false) |
| .build())); |
| |
| // When |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(new DefaultTextExtractor()) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.YES) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getHasAttachment()).isTrue(); |
| } |
| |
| @Test |
| void hasAttachmentsShouldReturnFalseWhenEmptyAttachments() throws IOException { |
| //Given |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(InMemoryMessageId.of(42)); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getAttachments()) |
| .thenReturn(ImmutableList.of()); |
| |
| // When |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(new DefaultTextExtractor()) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.NO) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getHasAttachment()).isFalse(); |
| } |
| |
| @Test |
| void attachmentsShouldNotBeenIndexedWhenAsked() throws Exception { |
| //Given |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(InMemoryMessageId.of(42)); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/mailWithHeaders.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| // When |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(new DefaultTextExtractor()) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.NO) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getAttachments()).isEmpty(); |
| } |
| |
| @Test |
| void attachmentsShouldBeenIndexedWhenAsked() throws Exception { |
| //Given |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(InMemoryMessageId.of(42)); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/emailWith3Attachments.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| // When |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(new DefaultTextExtractor()) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.YES) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getAttachments()).isNotEmpty(); |
| } |
| |
| @SuppressWarnings("checkstyle:LocalVariableName") |
| @Test |
| void otherAttachmentsShouldBeenIndexedWhenOneOfThemCannotBeParsed() throws Exception { |
| //Given |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(InMemoryMessageId.of(42)); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/emailWith3Attachments.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| TextExtractor textExtractor = mock(TextExtractor.class); |
| when(textExtractor.applicable(any())).thenReturn(true); |
| when(textExtractor.extractContentReactive(any(), any())) |
| .thenReturn(Mono.just(new ParsedContent(Optional.of("first attachment content"), ImmutableMap.of()))) |
| .thenReturn(Mono.error(new RuntimeException("second cannot be parsed"))) |
| .thenReturn(Mono.just(new ParsedContent(Optional.of("third attachment content"), ImmutableMap.of()))); |
| |
| // When |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(textExtractor) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.YES) |
| .build() |
| .block(); |
| |
| // Then |
| String NO_TEXTUAL_BODY = "The textual body is not present"; |
| assertThat(indexableMessage.getAttachments()) |
| .extracting(input -> input.getTextualBody().orElse(NO_TEXTUAL_BODY)) |
| .contains("first attachment content", NO_TEXTUAL_BODY, "third attachment content"); |
| } |
| |
| @Test |
| void shouldHandleCorrectlyMessageIdHavingSerializeMethodThatReturnNull() throws Exception { |
| MessageId invalidMessageIdThatReturnNull = mock(MessageId.class); |
| when(invalidMessageIdThatReturnNull.serialize()) |
| .thenReturn(null); |
| |
| // When |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(invalidMessageIdThatReturnNull); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/bodyMakeTikaToFail.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(textExtractor) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.YES) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getMessageId()).isNull(); |
| } |
| |
| @Test |
| void shouldHandleCorrectlyThreadIdHavingSerializeMethodThatReturnNull() throws Exception { |
| ThreadId invalidThreadIdThatReturnNull = mock(ThreadId.class); |
| when(invalidThreadIdThatReturnNull.serialize()) |
| .thenReturn(null); |
| |
| // When |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getThreadId()) |
| .thenReturn(invalidThreadIdThatReturnNull); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/bodyMakeTikaToFail.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(textExtractor) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.YES) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getThreadId()).isNull(); |
| } |
| |
| @Test |
| void shouldHandleCorrectlyNullMessageId() throws Exception { |
| |
| // When |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(null); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/bodyMakeTikaToFail.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(textExtractor) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.YES) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getMessageId()).isNull(); |
| } |
| |
| @Test |
| void shouldHandleCorrectlyNullThreadId() throws Exception { |
| |
| // When |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(null); |
| when(mailboxMessage.getThreadId()) |
| .thenReturn(null); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/bodyMakeTikaToFail.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(textExtractor) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.YES) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getThreadId()).isNull(); |
| } |
| |
| @Test |
| void shouldSerializeThreadIdCorrectly() throws Exception { |
| //Given |
| MailboxMessage mailboxMessage = mock(MailboxMessage.class); |
| TestId mailboxId = TestId.of(1); |
| when(mailboxMessage.getMailboxId()) |
| .thenReturn(mailboxId); |
| when(mailboxMessage.getModSeq()) |
| .thenReturn(ModSeq.first()); |
| when(mailboxMessage.getMessageId()) |
| .thenReturn(InMemoryMessageId.of(42)); |
| when(mailboxMessage.getThreadId()) |
| .thenReturn(ThreadId.fromBaseMessageId(InMemoryMessageId.of(42))); |
| when(mailboxMessage.getFullContent()) |
| .thenReturn(ClassLoader.getSystemResourceAsStream("eml/emailWith3Attachments.eml")); |
| when(mailboxMessage.createFlags()) |
| .thenReturn(new Flags()); |
| when(mailboxMessage.getUid()) |
| .thenReturn(MESSAGE_UID); |
| |
| |
| // When |
| IndexableMessage indexableMessage = IndexableMessage.builder() |
| .message(mailboxMessage) |
| .extractor(new DefaultTextExtractor()) |
| .zoneId(ZoneId.of("Europe/Paris")) |
| .indexAttachments(IndexAttachments.NO) |
| .build() |
| .block(); |
| |
| // Then |
| assertThat(indexableMessage.getThreadId()).isEqualTo("42"); |
| } |
| } |