blob: cc1ae4e884d1dc1a662b8d128652972a8cb577f0 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.sax;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.CountingInputStream;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
* Content handler decorator that attempts to prevent denial of service
* attacks against Tika parsers.
* <p>
* Currently this class simply compares the number of output characters
* to to the number of input bytes, and throws an exception if the output
* is truly excessive when compared to the input. This is a strong indication
* of a zip bomb.
*
* @since Apache Tika 0.4
* @see <a href="https://issues.apache.org/jira/browse/TIKA-216">TIKA-216</a>
*/
public class SecureContentHandler extends ContentHandlerDecorator {
/**
* The input stream that Tika is parsing.
*/
private final CountingInputStream stream;
/**
* Number of output characters that Tika has produced so far.
*/
private long characterCount = 0;
/**
* Output threshold.
*/
private long threshold = 1000000;
/**
* Maximum compression ratio.
*/
private long ratio = 100;
/**
* Decorates the given content handler with zip bomb prevention based
* on the count of bytes read from the given counting input stream.
* The resulting decorator can be passed to a Tika parser along with
* the given counting input stream.
*
* @param handler the content handler to be decorated
* @param stream the input stream to be parsed, wrapped into
* a {@link CountingInputStream} decorator
*/
public SecureContentHandler(
ContentHandler handler, CountingInputStream stream) {
super(handler);
this.stream = stream;
}
/**
* Returns the configured output threshold.
*
* @return output threshold
*/
public long getOutputThreshold() {
return threshold;
}
/**
* Sets the threshold for output characters before the zip bomb prevention
* is activated. This avoids false positives in cases where an otherwise
* normal document for some reason starts with a highly compressible
* sequence of bytes.
*
* @param threshold new output threshold
*/
public void setOutputThreshold(long threshold) {
this.threshold = threshold;
}
/**
* Returns the maximum compression ratio.
*
* @return maximum compression ratio
*/
public long getMaximumCompressionRatio() {
return ratio;
}
/**
* Sets the ratio between output characters and input bytes. If this
* ratio is exceeded (after the output threshold has been reached) then
* an exception gets thrown.
*
* @param ratio new maximum compression ratio
*/
public void setMaximumCompressionRatio(long ratio) {
this.ratio = ratio;
}
/**
* Converts the given {@link SAXException} to a corresponding
* {@link TikaException} if it's caused by this instance detecting
* a zip bomb.
*
* @param e SAX exception
* @throws TikaException zip bomb exception
*/
public void throwIfCauseOf(SAXException e) throws TikaException {
if (e instanceof SecureSAXException
&& ((SecureSAXException) e).isCausedBy(this)) {
throw new TikaException("Zip bomb detected!", e);
}
}
/**
* Records the given number of output characters (or more accurately
* UTF-16 code units). Throws an exception if the recorded number of
* characters highly exceeds the number of input bytes read.
*
* @param length number of new output characters produced
* @throws SAXException if a zip bomb is detected
*/
private void advance(int length) throws SAXException {
characterCount += length;
if (characterCount > threshold
&& characterCount > stream.getByteCount() * ratio) {
throw new SecureSAXException();
}
}
@Override
public void characters(char[] ch, int start, int length)
throws SAXException {
advance(length);
super.characters(ch, start, length);
}
@Override
public void ignorableWhitespace(char[] ch, int start, int length)
throws SAXException {
advance(length);
super.ignorableWhitespace(ch, start, length);
}
/**
* Private exception class used to indicate a suspected zip bomb.
*
* @see SecureContentHandler#throwIfCauseOf(SAXException)
*/
private class SecureSAXException extends SAXException {
public SecureSAXException() {
super("Suspected zip bomb: "
+ stream.getByteCount() + " input bytes produced "
+ characterCount + " output characters");
}
public boolean isCausedBy(SecureContentHandler handler) {
return SecureContentHandler.this == handler;
}
}
}