ORC-591: [C++] Check missing blob stream for StringDictionaryColumnReader
This fixes #471
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc
index e01a24b..016aed8 100644
--- a/c++/src/ColumnReader.cc
+++ b/c++/src/ColumnReader.cc
@@ -589,14 +589,15 @@
}
lengthArray[i] += lengthArray[i - 1];
}
- dictionary->dictionaryBlob.resize(
- static_cast<uint64_t>(lengthArray[dictSize]));
+ int64_t blobSize = lengthArray[dictSize];
+ dictionary->dictionaryBlob.resize(static_cast<uint64_t>(blobSize));
std::unique_ptr<SeekableInputStream> blobStream =
stripe.getStream(columnId, proto::Stream_Kind_DICTIONARY_DATA, false);
- readFully(
- dictionary->dictionaryBlob.data(),
- lengthArray[dictSize],
- blobStream.get());
+ if (blobSize > 0 && blobStream == nullptr) {
+ throw ParseError(
+ "DICTIONARY_DATA stream not found in StringDictionaryColumn");
+ }
+ readFully(dictionary->dictionaryBlob.data(), blobSize, blobStream.get());
}
StringDictionaryColumnReader::~StringDictionaryColumnReader() {
diff --git a/examples/corrupt/missing_blob_stream_in_string_dict.orc b/examples/corrupt/missing_blob_stream_in_string_dict.orc
new file mode 100644
index 0000000..1c7f742
--- /dev/null
+++ b/examples/corrupt/missing_blob_stream_in_string_dict.orc
Binary files differ
diff --git a/tools/test/TestFileScan.cc b/tools/test/TestFileScan.cc
index 8c783d7..de2f91d 100644
--- a/tools/test/TestFileScan.cc
+++ b/tools/test/TestFileScan.cc
@@ -149,7 +149,9 @@
checkForError(findExample("corrupt/stripe_footer_bad_column_encodings.orc"),
"bad number of ColumnEncodings in StripeFooter: expected=6, actual=0");
checkForError(findExample("corrupt/negative_dict_entry_lengths.orc"),
- "Negative dictionary entry length");
+ "Negative dictionary entry length");
checkForError(findExample("corrupt/missing_length_stream_in_string_dict.orc"),
- "LENGTH stream not found in StringDictionaryColumn");
+ "LENGTH stream not found in StringDictionaryColumn");
+ checkForError(findExample("corrupt/missing_blob_stream_in_string_dict.orc"),
+ "DICTIONARY_DATA stream not found in StringDictionaryColumn");
}