blob: bf4ebe10d499bd80abd628ac14cdc08b31733758 [file] [log] [blame]
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/
package org.apache.james.mailbox.opensearch.json;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Deque;
import java.util.LinkedList;
import java.util.Optional;
import org.apache.james.mailbox.extractor.TextExtractor;
import org.apache.james.mailbox.model.ContentType.MediaType;
import org.apache.james.mailbox.model.ContentType.SubType;
import org.apache.james.mailbox.store.mail.model.Message;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.codec.DecodeMonitor;
import org.apache.james.mime4j.field.LenientFieldParser;
import org.apache.james.mime4j.message.DefaultBodyDescriptorBuilder;
import org.apache.james.mime4j.message.MaximalBodyDescriptor;
import org.apache.james.mime4j.stream.EntityState;
import org.apache.james.mime4j.stream.MimeConfig;
import org.apache.james.mime4j.stream.MimeTokenStream;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Preconditions;
public class MimePartParser {
private static final Logger LOGGER = LoggerFactory.getLogger(MimePartParser.class);
private static final LenientFieldParser FIELD_PARSER = new LenientFieldParser();
private final Message message;
private final TextExtractor textExtractor;
private final MimeTokenStream stream;
private final Deque<MimePartContainerBuilder> builderStack;
private MimePart.ParsedMimePart result;
private MimePartContainerBuilder currentlyBuildMimePart;
public MimePartParser(Message message, TextExtractor textExtractor) {
this.message = message;
this.textExtractor = textExtractor;
this.builderStack = new LinkedList<>();
this.currentlyBuildMimePart = new RootMimePartContainerBuilder();
this.stream = new MimeTokenStream(
MimeConfig.PERMISSIVE,
new DefaultBodyDescriptorBuilder(null, FIELD_PARSER, DecodeMonitor.SILENT));
}
public MimePart.ParsedMimePart parse() throws IOException, MimeException {
stream.parse(message.getFullContent());
for (EntityState state = stream.getState(); state != EntityState.T_END_OF_STREAM; state = stream.next()) {
processMimePart(stream, state);
}
return result;
}
private void processMimePart(MimeTokenStream stream, EntityState state) {
switch (state) {
case T_START_MULTIPART:
case T_START_MESSAGE:
stackCurrent();
break;
case T_START_HEADER:
currentlyBuildMimePart = MimePart.builder(textExtractor::applicable);
break;
case T_FIELD:
currentlyBuildMimePart.addToHeaders(stream.getField());
break;
case T_BODY:
manageBodyExtraction(stream);
closeMimePart();
break;
case T_END_MULTIPART:
case T_END_MESSAGE:
unstackToCurrent();
closeMimePart();
break;
default:
break;
}
}
private void stackCurrent() {
builderStack.push(currentlyBuildMimePart);
currentlyBuildMimePart = null;
}
private void unstackToCurrent() {
currentlyBuildMimePart = builderStack.pop();
}
private void closeMimePart() {
MimePart.ParsedMimePart bodyMimePart = currentlyBuildMimePart.build();
if (!builderStack.isEmpty()) {
builderStack.peek().addChild(bodyMimePart);
} else {
Preconditions.checkState(result == null);
result = bodyMimePart;
}
}
private void manageBodyExtraction(MimeTokenStream stream) {
extractMimePartBodyDescription(stream);
currentlyBuildMimePart.addBodyContent(stream.getDecodedInputStream());
}
private void extractMimePartBodyDescription(MimeTokenStream stream) {
MaximalBodyDescriptor descriptor = (MaximalBodyDescriptor) stream.getBodyDescriptor();
Optional.ofNullable(descriptor.getMediaType())
.map(MediaType::of)
.ifPresent(currentlyBuildMimePart::addMediaType);
Optional.ofNullable(descriptor.getSubType())
.map(SubType::of)
.ifPresent(currentlyBuildMimePart::addSubType);
currentlyBuildMimePart.addContentDisposition(descriptor.getContentDispositionType())
.addFileName(descriptor.getContentDispositionFilename());
extractCharset(descriptor);
}
private void extractCharset(MaximalBodyDescriptor descriptor) {
try {
Optional.ofNullable(descriptor.getCharset())
.map(Charset::forName)
.ifPresent(currentlyBuildMimePart::charset);
} catch (Exception e) {
LOGGER.info("Failed parsing charset", e);
}
}
}