IMPALA-14030: Fix buffer underflow when base64 decoding 0 length binaries
The issue didn't cause problems under normal circumstances but ASAN
tests caught it in JSON tests enabled in IMPALA-12927.
Changed text parsing logic to skip base64 decoding for empty binaries.
Also fixed Base64DecodeBufLen() with len=0 and added unit tests, though
this function is not used with len=0 outside BE tests.
Change-Id: I511cff8cec319d03d494a342f2cbb4a251cb893e
Reviewed-on: http://gerrit.cloudera.org:8080/22855
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h
index 6e14746..15df3e3 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -67,7 +67,7 @@
!(len != 0 && (copy_string || need_escape));
bool base64_decode = false;
- if (type.IsBinaryType() && decode_binary_) {
+ if (type.IsBinaryType() && decode_binary_ && len != 0) {
base64_decode = true;
reuse_data = false;
int64_t out_len;
diff --git a/be/src/util/coding-util-test.cc b/be/src/util/coding-util-test.cc
index 7914de1..8a2f855 100644
--- a/be/src/util/coding-util-test.cc
+++ b/be/src/util/coding-util-test.cc
@@ -144,6 +144,16 @@
TestBase64(string("a\0b\0", 4), "YQBiAA==");
}
+TEST(Base64Test, TinyLength) {
+ string str = "====";
+ int64_t len;
+ // Length of 1 should fail as it is not divisible with 4.
+ EXPECT_FALSE(Base64DecodeBufLen(str.data(), 1, &len));
+ // Length of 0 is valid and should return 0 (regression test for IMPALA-14030).
+ EXPECT_TRUE(Base64DecodeBufLen(str.data()+2, 0, &len));
+ EXPECT_EQ(len, 0);
+}
+
TEST(Base64Test, VariousInitialVariableValues) {
TestBase64EncodeWithInitialValues(0, 0);
TestBase64EncodeWithInitialValues(5, -10);
diff --git a/be/src/util/coding-util.cc b/be/src/util/coding-util.cc
index 7a7d1f6..f4578e4 100644
--- a/be/src/util/coding-util.cc
+++ b/be/src/util/coding-util.cc
@@ -191,10 +191,16 @@
// producing one fewer byte of output. This is repeated if the second-to-last character
// is '='. One more byte must be allocated to account for Base64Decode's null-padding
// of its output.
+ if (UNLIKELY(in_len == 0)) {
+ *out_max = 0;
+ return true;
+ }
if (UNLIKELY((in_len & 3) != 0)) return false;
*out_max = 1 + 3 * (in_len / 4);
+ DCHECK_GE(in_len, 1);
if (in[in_len - 1] == '=') {
--(*out_max);
+ DCHECK_GE(in_len, 2);
if (in[in_len - 2] == '=') {
--(*out_max);
}