ORC-1834: [C++] Fix undefined behavior ### What changes were proposed in this pull request? Unaligned reads are UB in C++, memcpy-ing zero bytes is UB either. ### How was this patch tested? Internal UBsan report was used to detect and fix this bug. Closes #2112 from georgthegreat/patch-3. Authored-by: Yuriy Chernyshov <thegeorg@yandex-team.com> Signed-off-by: Gang Wu <ustcwg@gmail.com>
diff --git a/c++/src/ColumnReader.cc b/c++/src/ColumnReader.cc index e70f916..af434c3 100644 --- a/c++/src/ColumnReader.cc +++ b/c++/src/ColumnReader.cc
@@ -395,7 +395,7 @@ int64_t bits = 0; if (bufferEnd_ - bufferPointer_ >= 8) { if (isLittleEndian) { - bits = *(reinterpret_cast<const int64_t*>(bufferPointer_)); + memcpy(&bits, bufferPointer_, sizeof(bits)); } else { bits = static_cast<int64_t>(static_cast<unsigned char>(bufferPointer_[0])); bits |= static_cast<int64_t>(static_cast<unsigned char>(bufferPointer_[1])) << 8; @@ -509,8 +509,10 @@ bufferNum = std::min(numValues, static_cast<size_t>(bufferEnd_ - bufferPointer_) / bytesPerValue_); uint64_t bufferBytes = bufferNum * bytesPerValue_; - memcpy(outArray, bufferPointer_, bufferBytes); - bufferPointer_ += bufferBytes; + if (bufferBytes > 0) { + memcpy(outArray, bufferPointer_, bufferBytes); + bufferPointer_ += bufferBytes; + } } for (size_t i = bufferNum; i < numValues; ++i) { outArray[i] = readDouble<ValueType>();