Add sanity check for TextRegion with excessive number of symbols
diff --git a/src/main/java/org/apache/pdfbox/jbig2/segments/TextRegion.java b/src/main/java/org/apache/pdfbox/jbig2/segments/TextRegion.java
index 0e11bc8..200b036 100644
--- a/src/main/java/org/apache/pdfbox/jbig2/segments/TextRegion.java
+++ b/src/main/java/org/apache/pdfbox/jbig2/segments/TextRegion.java
@@ -32,8 +32,8 @@
import org.apache.pdfbox.jbig2.decoder.huffman.EncodedTable;
import org.apache.pdfbox.jbig2.decoder.huffman.FixedSizeTable;
import org.apache.pdfbox.jbig2.decoder.huffman.HuffmanTable;
-import org.apache.pdfbox.jbig2.decoder.huffman.StandardTables;
import org.apache.pdfbox.jbig2.decoder.huffman.HuffmanTable.Code;
+import org.apache.pdfbox.jbig2.decoder.huffman.StandardTables;
import org.apache.pdfbox.jbig2.err.IntegerMaxValueException;
import org.apache.pdfbox.jbig2.err.InvalidHeaderValueException;
import org.apache.pdfbox.jbig2.image.Bitmaps;
@@ -239,6 +239,14 @@
private void readAmountOfSymbolInstances() throws IOException {
amountOfSymbolInstances = subInputStream.readBits(32) & 0xffffffff;
+
+ // sanity check: don't decode more than one symbol per pixel
+ long pixels = (long) regionInfo.getBitmapWidth() * (long) regionInfo.getBitmapHeight();
+ if (pixels < amountOfSymbolInstances) {
+ log.warn("Limiting number of decoded symbol instances to one per pixel (" + pixels + " instead of "
+ + amountOfSymbolInstances + ")");
+ amountOfSymbolInstances = pixels;
+ }
}
private void getSymbols() throws IOException, IntegerMaxValueException, InvalidHeaderValueException {
@@ -394,7 +402,7 @@
/* Last two sentences of 6.4.5 2) */
long firstS = 0;
- int instanceCounter = 0;
+ long instanceCounter = 0;
/* 6.4.5 3 a) */
while (instanceCounter < amountOfSymbolInstances) {
@@ -422,12 +430,11 @@
final long idS = decodeIdS();
/*
- * If result is OOB, then all the symbol instances in this strip have been decoded;
- * proceed to step 3 d) respectively 3 b). Also exit, if the expected number of
- * instances have been decoded.
+ * If result is OOB, then all the symbol instances in this strip have been decoded; proceed to step
+ * 3 d) respectively 3 b). Also exit, if the expected number of instances have been decoded.
*
- * The latter exit condition guards against pathological cases where a strip's
- * S never contains OOB and thus never terminates as illustrated in
+ * The latter exit condition guards against pathological cases where a strip's S never contains OOB
+ * and thus never terminates as illustrated in
* https://bugs.chromium.org/p/chromium/issues/detail?id=450971 case pdfium-loop2.pdf.
*/
if (idS == Long.MAX_VALUE || instanceCounter >= amountOfSymbolInstances)