blob: 2b26a66af6779bcd8b28ce8f76346e6fb2a9af6e [file] [log] [blame]
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/
package org.apache.james.mailbox.elasticsearch.json;
import java.time.ZonedDateTime;
import java.util.HashSet;
import java.util.Locale;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.james.mailbox.store.search.SearchUtil;
import org.apache.james.mime4j.dom.address.Address;
import org.apache.james.mime4j.dom.address.Group;
import org.apache.james.mime4j.dom.address.Mailbox;
import org.apache.james.mime4j.field.address.LenientAddressParser;
import org.apache.james.mime4j.stream.Field;
import org.apache.james.mime4j.util.MimeUtil;
import org.apache.james.util.date.ImapDateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
public class HeaderCollection {
public static class Builder {
// Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
// Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
// This REGEXP is here to match ( in order to remove ) the possible invalid end of a header date
// Example of matching patterns :
// (UTC)
// (CEST)
private static final Pattern DATE_SANITIZING_PATTERN = Pattern.compile(" *\\(.*\\) *");
private final Set<EMailer> toAddressSet;
private final Set<EMailer> fromAddressSet;
private final Set<EMailer> ccAddressSet;
private final Set<EMailer> bccAddressSet;
private final Set<EMailer> replyToAddressSet;
private final Set<String> subjectSet;
private final Multimap<String, String> headers;
private Optional<ZonedDateTime> sentDate;
private Builder() {
toAddressSet = new HashSet<>();
fromAddressSet = new HashSet<>();
ccAddressSet = new HashSet<>();
bccAddressSet = new HashSet<>();
replyToAddressSet = new HashSet<>();
subjectSet = new HashSet<>();
headers = ArrayListMultimap.create();
sentDate = Optional.empty();
}
public Builder add(Field field) {
Preconditions.checkNotNull(field);
String headerName = field.getName().toLowerCase(Locale.US);
String sanitizedValue = MimeUtil.unscrambleHeaderValue(field.getBody());
headers.put(headerName, sanitizedValue);
handleSpecificHeader(headerName, sanitizedValue);
return this;
}
public HeaderCollection build() {
return new HeaderCollection(
ImmutableSet.copyOf(toAddressSet),
ImmutableSet.copyOf(fromAddressSet),
ImmutableSet.copyOf(ccAddressSet),
ImmutableSet.copyOf(bccAddressSet),
ImmutableSet.copyOf(replyToAddressSet),
ImmutableSet.copyOf(subjectSet),
ImmutableMultimap.copyOf(headers),
sentDate);
}
private void handleSpecificHeader(String headerName, String headerValue) {
switch (headerName) {
case TO:
case FROM:
case CC:
case BCC:
case REPLY_TO:
manageAddressField(headerName, headerValue);
break;
case SUBJECT:
subjectSet.add(headerValue);
break;
case DATE:
sentDate = toISODate(headerValue);
break;
}
}
private void manageAddressField(String headerName, String headerValue) {
LenientAddressParser.DEFAULT
.parseAddressList(headerValue)
.stream()
.flatMap(this::convertAddressToMailboxStream)
.map((mailbox) -> new EMailer(SearchUtil.getDisplayAddress(mailbox) , mailbox.getAddress()))
.collect(Collectors.toCollection(() -> getAddressSet(headerName)));
}
private Stream<Mailbox> convertAddressToMailboxStream(Address address) {
if (address instanceof Mailbox) {
return Stream.of((Mailbox) address);
} else if (address instanceof Group) {
return ((Group) address).getMailboxes().stream();
}
return Stream.empty();
}
private Set<EMailer> getAddressSet(String headerName) {
switch (headerName) {
case TO:
return toAddressSet;
case FROM:
return fromAddressSet;
case CC:
return ccAddressSet;
case BCC:
return bccAddressSet;
case REPLY_TO:
return replyToAddressSet;
}
throw new RuntimeException(headerName + " is not a address header name");
}
private Optional<ZonedDateTime> toISODate(String value) {
try {
return Optional.of(ZonedDateTime.parse(
sanitizeDateStringHeaderValue(value),
ImapDateTimeFormatter.rfc5322()));
} catch (Exception e) {
LOGGER.info("Can not parse receive date " + value);
return Optional.empty();
}
}
@VisibleForTesting String sanitizeDateStringHeaderValue(String value) {
// Some sent e-mail have this form : Wed, 3 Jun 2015 09:05:46 +0000 (UTC)
// Java 8 Time library RFC_1123_DATE_TIME corresponds to Wed, 3 Jun 2015 09:05:46 +0000 only
// This method is here to convert the first date into something parsable by RFC_1123_DATE_TIME DateTimeFormatter
Matcher sanitizerMatcher = DATE_SANITIZING_PATTERN.matcher(value);
if (sanitizerMatcher.find()) {
return value.substring(0 , sanitizerMatcher.start());
}
return value;
}
}
public static final String TO = "to";
public static final String FROM = "from";
public static final String CC = "cc";
public static final String BCC = "bcc";
public static final String REPLY_TO = "reply-to";
public static final String SUBJECT = "subject";
public static final String DATE = "date";
private static final Logger LOGGER = LoggerFactory.getLogger(HeaderCollection.class);
public static Builder builder() {
return new Builder();
}
private final ImmutableSet<EMailer> toAddressSet;
private final ImmutableSet<EMailer> fromAddressSet;
private final ImmutableSet<EMailer> ccAddressSet;
private final ImmutableSet<EMailer> bccAddressSet;
private final ImmutableSet<EMailer> replyToAddressSet;
private final ImmutableSet<String> subjectSet;
private final ImmutableMultimap<String, String> headers;
private final Optional<ZonedDateTime> sentDate;
private HeaderCollection(ImmutableSet<EMailer> toAddressSet, ImmutableSet<EMailer> fromAddressSet,
ImmutableSet<EMailer> ccAddressSet, ImmutableSet<EMailer> bccAddressSet, ImmutableSet<EMailer> replyToAddressSet, ImmutableSet<String> subjectSet,
ImmutableMultimap<String, String> headers, Optional<ZonedDateTime> sentDate) {
this.toAddressSet = toAddressSet;
this.fromAddressSet = fromAddressSet;
this.ccAddressSet = ccAddressSet;
this.bccAddressSet = bccAddressSet;
this.replyToAddressSet = replyToAddressSet;
this.subjectSet = subjectSet;
this.headers = headers;
this.sentDate = sentDate;
}
public Set<EMailer> getToAddressSet() {
return toAddressSet;
}
public Set<EMailer> getFromAddressSet() {
return fromAddressSet;
}
public Set<EMailer> getCcAddressSet() {
return ccAddressSet;
}
public Set<EMailer> getBccAddressSet() {
return bccAddressSet;
}
public Set<EMailer> getReplyToAddressSet() {
return replyToAddressSet;
}
public Set<String> getSubjectSet() {
return subjectSet;
}
public Optional<ZonedDateTime> getSentDate() {
return sentDate;
}
public Multimap<String, String> getHeaders() {
return headers;
}
}