IMPALA-14030: Fix buffer underflow when base64 decoding 0 length binaries

The issue didn't cause problems under normal circumstances but ASAN
tests caught it in JSON tests enabled in IMPALA-12927.

Changed text parsing logic to skip base64 decoding for empty binaries.

Also fixed Base64DecodeBufLen() with len=0 and added unit tests, though
this function is not used with len=0 outside BE tests.

Change-Id: I511cff8cec319d03d494a342f2cbb4a251cb893e
Reviewed-on: http://gerrit.cloudera.org:8080/22855
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Reviewed-by: Zoltan Borok-Nagy <boroknagyz@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h
index 6e14746..15df3e3 100644
--- a/be/src/exec/text-converter.inline.h
+++ b/be/src/exec/text-converter.inline.h
@@ -67,7 +67,7 @@
           !(len != 0 && (copy_string || need_escape));
 
       bool base64_decode = false;
-      if (type.IsBinaryType() && decode_binary_) {
+      if (type.IsBinaryType() && decode_binary_ && len != 0) {
         base64_decode = true;
         reuse_data = false;
         int64_t out_len;
diff --git a/be/src/util/coding-util-test.cc b/be/src/util/coding-util-test.cc
index 7914de1..8a2f855 100644
--- a/be/src/util/coding-util-test.cc
+++ b/be/src/util/coding-util-test.cc
@@ -144,6 +144,16 @@
   TestBase64(string("a\0b\0", 4), "YQBiAA==");
 }
 
+TEST(Base64Test, TinyLength) {
+  string str = "====";
+  int64_t len;
+  // Length of 1 should fail as it is not divisible with 4.
+  EXPECT_FALSE(Base64DecodeBufLen(str.data(), 1, &len));
+  // Length of 0 is valid and should return 0 (regression test for IMPALA-14030).
+  EXPECT_TRUE(Base64DecodeBufLen(str.data()+2, 0, &len));
+  EXPECT_EQ(len, 0);
+}
+
 TEST(Base64Test, VariousInitialVariableValues) {
   TestBase64EncodeWithInitialValues(0, 0);
   TestBase64EncodeWithInitialValues(5, -10);
diff --git a/be/src/util/coding-util.cc b/be/src/util/coding-util.cc
index 7a7d1f6..f4578e4 100644
--- a/be/src/util/coding-util.cc
+++ b/be/src/util/coding-util.cc
@@ -191,10 +191,16 @@
   // producing one fewer byte of output. This is repeated if the second-to-last character
   // is '='. One more byte must be allocated to account for Base64Decode's null-padding
   // of its output.
+  if (UNLIKELY(in_len == 0)) {
+    *out_max = 0;
+    return true;
+  }
   if (UNLIKELY((in_len & 3) != 0)) return false;
   *out_max = 1 + 3 * (in_len / 4);
+  DCHECK_GE(in_len, 1);
   if (in[in_len - 1] == '=') {
     --(*out_max);
+    DCHECK_GE(in_len, 2);
     if (in[in_len - 2] == '=') {
       --(*out_max);
     }