blob: 1b252e7c988d66227c18ce8a45398707b2c22e79 [file] [log] [blame]
/****************************************************************
* Licensed to the Apache Software Foundation (ASF) under one *
* or more contributor license agreements. See the NOTICE file *
* distributed with this work for additional information *
* regarding copyright ownership. The ASF licenses this file *
* to you under the Apache License, Version 2.0 (the *
* "License"); you may not use this file except in compliance *
* with the License. You may obtain a copy of the License at *
* *
* http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, *
* software distributed under the License is distributed on an *
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY *
* KIND, either express or implied. See the License for the *
* specific language governing permissions and limitations *
* under the License. *
****************************************************************/
package org.apache.james.mime4j.stream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.LinkedList;
import org.apache.james.mime4j.MimeException;
import org.apache.james.mime4j.codec.DecodeMonitor;
import org.apache.james.mime4j.io.LineNumberInputStream;
import org.apache.james.mime4j.io.LineNumberSource;
import org.apache.james.mime4j.util.CharsetUtil;
/**
* <p>
* Parses MIME (or RFC822) message streams of bytes or characters.
* The stream is converted into an event stream.
* <p>
* <p>
* Typical usage:
* </p>
* <pre>
* MimeTokenStream stream = new MimeTokenStream();
* stream.parse(new FileInputStream("mime.msg"));
* for (int state = stream.getState();
* state != MimeTokenStream.T_END_OF_STREAM;
* state = stream.next()) {
* switch (state) {
* case MimeTokenStream.T_BODY:
* System.out.println("Body detected, contents = "
* + stream.getInputStream() + ", header data = "
* + stream.getBodyDescriptor());
* break;
* case MimeTokenStream.T_FIELD:
* System.out.println("Header field detected: "
* + stream.getField());
* break;
* case MimeTokenStream.T_START_MULTIPART:
* System.out.println("Multipart message detexted,"
* + " header data = "
* + stream.getBodyDescriptor());
* ...
* }
* }
* </pre>
* <p>Instances of {@link BasicMimeTokenStream} are reusable: Invoking the
* method {@link #parse(InputStream)} resets the token streams internal
* state. However, they are definitely <em>not</em> thread safe. If you
* have a multi threaded application, then the suggested use is to have
* one instance per thread.</p>
*/
public class BasicMimeTokenStream implements EntityStates, RecursionMode {
private final MimeEntityConfig config;
private final LinkedList<EntityStateMachine> entities = new LinkedList<EntityStateMachine>();
private int state = T_END_OF_STREAM;
private EntityStateMachine currentStateMachine;
private int recursionMode = M_RECURSE;
private MimeEntity rootentity;
private final DecodeMonitor monitor;
/**
* Constructs a standard (lax) stream.
* Optional validation events will be logged only.
* Use {@link #createStrictValidationStream()} to create
* a stream that strictly validates the input.
*/
public BasicMimeTokenStream() {
this(new MimeEntityConfig());
}
public BasicMimeTokenStream(final MimeEntityConfig config) {
this(config, null);
}
public BasicMimeTokenStream(final MimeEntityConfig config, DecodeMonitor monitor) {
super();
this.config = config;
this.monitor = monitor != null ? monitor : (config.isStrictParsing() ? DecodeMonitor.STRICT : DecodeMonitor.SILENT);
}
/** Instructs the {@code MimeTokenStream} to parse the given streams contents.
* If the {@code MimeTokenStream} has already been in use, resets the streams
* internal state.
*/
public void parse(InputStream stream) {
doParse(stream, null);
}
/** Instructs the {@code MimeTokenStream} to parse the given content with
* the content type. The message stream is assumed to have no message header
* and is expected to begin with a message body. This can be the case when
* the message content is transmitted using a different transport protocol
* such as HTTP.
* <p/>
* If the {@code MimeTokenStream} has already been in use, resets the streams
* internal state.
*/
public void parseHeadless(InputStream stream, String contentType) {
if (contentType == null) {
throw new IllegalArgumentException("Content type may not be null");
}
doParse(stream, contentType);
}
protected void doParse(InputStream stream, String contentType) {
MutableBodyDescriptor newBodyDescriptor = newBodyDescriptor();
int start = T_START_MESSAGE;
if (contentType != null) {
start = T_END_HEADER;
newBodyDescriptor.addField(new RawField("Content-Type", contentType));
}
doParse(stream, newBodyDescriptor, start);
}
protected MutableBodyDescriptor newBodyDescriptor() {
return new DefaultBodyDescriptor(null);
}
public void doParse(InputStream stream,
MutableBodyDescriptor newBodyDescriptor, int start) {
LineNumberSource lineSource = null;
if (config.isCountLineNumbers()) {
LineNumberInputStream lineInput = new LineNumberInputStream(stream);
lineSource = lineInput;
stream = lineInput;
}
rootentity = new MimeEntity(
lineSource,
stream,
newBodyDescriptor,
start,
T_END_MESSAGE,
config,
monitor);
rootentity.setRecursionMode(recursionMode);
currentStateMachine = rootentity;
entities.clear();
entities.add(currentStateMachine);
state = currentStateMachine.getState();
}
/**
* Determines if this parser is currently in raw mode.
*
* @return <code>true</code> if in raw mode, <code>false</code>
* otherwise.
* @see #setRecursionMode(int)
*/
public boolean isRaw() {
return recursionMode == M_RAW;
}
/**
* Gets the current recursion mode.
* The recursion mode specifies the approach taken to parsing parts.
* {@link #M_RAW} mode does not parse the part at all.
* {@link #M_RECURSE} mode recursively parses each mail
* when an <code>message/rfc822</code> part is encounted;
* {@link #M_NO_RECURSE} does not.
* @return {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
*/
public int getRecursionMode() {
return recursionMode;
}
/**
* Sets the current recursion.
* The recursion mode specifies the approach taken to parsing parts.
* {@link #M_RAW} mode does not parse the part at all.
* {@link #M_RECURSE} mode recursively parses each mail
* when an <code>message/rfc822</code> part is encounted;
* {@link #M_NO_RECURSE} does not.
* @param mode {@link #M_RECURSE}, {@link #M_RAW} or {@link #M_NO_RECURSE}
*/
public void setRecursionMode(int mode) {
recursionMode = mode;
if (currentStateMachine != null) {
currentStateMachine.setRecursionMode(mode);
}
}
/**
* Finishes the parsing and stops reading lines.
* NOTE: No more lines will be parsed but the parser
* will still call
* {@link ContentHandler#endMultipart()},
* {@link ContentHandler#endBodyPart()},
* {@link ContentHandler#endMessage()}, etc to match previous calls
* to
* {@link ContentHandler#startMultipart(BodyDescriptor)},
* {@link ContentHandler#startBodyPart()},
* {@link ContentHandler#startMessage()}, etc.
*/
public void stop() {
rootentity.stop();
}
/**
* Returns the current state.
*/
public int getState() {
return state;
}
/**
* This method returns the raw entity, preamble, or epilogue contents.
* <p/>
* This method is valid, if {@link #getState()} returns either of
* {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
*
* @return Data stream, depending on the current state.
* @throws IllegalStateException {@link #getState()} returns an
* invalid value.
*/
public InputStream getInputStream() {
return currentStateMachine.getContentStream();
}
/**
* This method returns a transfer decoded stream based on the MIME
* fields with the standard defaults.
* <p/>
* This method is valid, if {@link #getState()} returns either of
* {@link #T_RAW_ENTITY}, {@link #T_PREAMBLE}, or {@link #T_EPILOGUE}.
*
* @return Data stream, depending on the current state.
* @throws IllegalStateException {@link #getState()} returns an
* invalid value.
*/
public InputStream getDecodedInputStream() {
return currentStateMachine.getDecodedContentStream();
}
/**
* Gets a reader configured for the current body or body part.
* The reader will return a transfer and charset decoded
* stream of characters based on the MIME fields with the standard
* defaults.
* This is a conveniance method and relies on {@link #getInputStream()}.
* Consult the javadoc for that method for known limitations.
*
* @return <code>Reader</code>, not null
* @see #getInputStream
* @throws IllegalStateException {@link #getState()} returns an
* invalid value
* @throws UnsupportedCharsetException if there is no JVM support
* for decoding the charset
* @throws IllegalCharsetNameException if the charset name specified
* in the mime type is illegal
*/
public Reader getReader() {
final BodyDescriptor bodyDescriptor = getBodyDescriptor();
final String mimeCharset = bodyDescriptor.getCharset();
final Charset charset;
if (mimeCharset == null || "".equals(mimeCharset)) {
charset = CharsetUtil.US_ASCII;
} else {
charset = Charset.forName(mimeCharset);
}
final InputStream instream = getDecodedInputStream();
return new InputStreamReader(instream, charset);
}
/**
* <p>Gets a descriptor for the current entity.
* This method is valid if {@link #getState()} returns:</p>
* <ul>
* <li>{@link #T_BODY}</li>
* <li>{@link #T_START_MULTIPART}</li>
* <li>{@link #T_EPILOGUE}</li>
* <li>{@link #T_PREAMBLE}</li>
* </ul>
* @return <code>BodyDescriptor</code>, not nulls
*/
public BodyDescriptor getBodyDescriptor() {
return currentStateMachine.getBodyDescriptor();
}
/**
* This method is valid, if {@link #getState()} returns {@link #T_FIELD}.
* @return String with the fields raw contents.
* @throws IllegalStateException {@link #getState()} returns another
* value than {@link #T_FIELD}.
*/
public RawField getField() {
return currentStateMachine.getField();
}
/**
* This method advances the token stream to the next token.
* @throws IllegalStateException The method has been called, although
* {@link #getState()} was already {@link #T_END_OF_STREAM}.
*/
public int next() throws IOException, MimeException {
if (state == T_END_OF_STREAM || currentStateMachine == null) {
throw new IllegalStateException("No more tokens are available.");
}
while (currentStateMachine != null) {
EntityStateMachine next = currentStateMachine.advance();
if (next != null) {
entities.add(next);
currentStateMachine = next;
}
state = currentStateMachine.getState();
if (state != T_END_OF_STREAM) {
return state;
}
entities.removeLast();
if (entities.isEmpty()) {
currentStateMachine = null;
} else {
currentStateMachine = entities.getLast();
currentStateMachine.setRecursionMode(recursionMode);
}
}
state = T_END_OF_STREAM;
return state;
}
/**
* Renders a state as a string suitable for logging.
* @param state
* @return rendered as string, not null
*/
public static final String stateToString(int state) {
return AbstractEntity.stateToString(state);
}
public MimeEntityConfig getConfig() {
return config;
}
}