ARROW-2227: [Python] Fix off-by-one error in chunked binary conversions

We were already testing the chunked behavior but we did not exercise the off-by-one error edge case

Author: Wes McKinney <wes.mckinney@twosigma.com>

Closes #1740 from wesm/ARROW-2227 and squashes the following commits:

fd1ab178 <Wes McKinney> Move constexpr to builder.h, add constexpr for max # of list elements
ca5ecc0f <Wes McKinney> Fix off-by-one error in chunked binary conversions
diff --git a/cpp/src/arrow/builder.cc b/cpp/src/arrow/builder.cc
index ef4e7fd..aa9f3ce 100644
--- a/cpp/src/arrow/builder.cc
+++ b/cpp/src/arrow/builder.cc
@@ -1236,7 +1236,7 @@
 
 Status ListBuilder::AppendNextOffset() {
   int64_t num_values = value_builder_->length();
-  if (ARROW_PREDICT_FALSE(num_values >= std::numeric_limits<int32_t>::max())) {
+  if (ARROW_PREDICT_FALSE(num_values > kListMaximumElements)) {
     std::stringstream ss;
     ss << "ListArray cannot contain more then INT32_MAX - 1 child elements,"
        << " have " << num_values;
@@ -1252,14 +1252,14 @@
 }
 
 Status ListBuilder::Init(int64_t elements) {
-  DCHECK_LT(elements, std::numeric_limits<int32_t>::max());
+  DCHECK_LE(elements, kListMaximumElements);
   RETURN_NOT_OK(ArrayBuilder::Init(elements));
   // one more then requested for offsets
   return offsets_builder_.Resize((elements + 1) * sizeof(int32_t));
 }
 
 Status ListBuilder::Resize(int64_t capacity) {
-  DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
+  DCHECK_LE(capacity, kListMaximumElements);
   // one more then requested for offsets
   RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
   return ArrayBuilder::Resize(capacity);
@@ -1303,14 +1303,14 @@
 BinaryBuilder::BinaryBuilder(MemoryPool* pool) : BinaryBuilder(binary(), pool) {}
 
 Status BinaryBuilder::Init(int64_t elements) {
-  DCHECK_LT(elements, std::numeric_limits<int32_t>::max());
+  DCHECK_LE(elements, kListMaximumElements);
   RETURN_NOT_OK(ArrayBuilder::Init(elements));
   // one more then requested for offsets
   return offsets_builder_.Resize((elements + 1) * sizeof(int32_t));
 }
 
 Status BinaryBuilder::Resize(int64_t capacity) {
-  DCHECK_LT(capacity, std::numeric_limits<int32_t>::max());
+  DCHECK_LE(capacity, kListMaximumElements);
   // one more then requested for offsets
   RETURN_NOT_OK(offsets_builder_.Resize((capacity + 1) * sizeof(int32_t)));
   return ArrayBuilder::Resize(capacity);
@@ -1318,7 +1318,7 @@
 
 Status BinaryBuilder::ReserveData(int64_t elements) {
   if (value_data_length() + elements > value_data_capacity()) {
-    if (value_data_length() + elements > std::numeric_limits<int32_t>::max()) {
+    if (value_data_length() + elements > kBinaryMemoryLimit) {
       return Status::Invalid("Cannot reserve capacity larger than 2^31 - 1 for binary");
     }
     RETURN_NOT_OK(value_data_builder_.Reserve(elements));
@@ -1328,9 +1328,9 @@
 
 Status BinaryBuilder::AppendNextOffset() {
   const int64_t num_bytes = value_data_builder_.length();
-  if (ARROW_PREDICT_FALSE(num_bytes > kMaximumCapacity)) {
+  if (ARROW_PREDICT_FALSE(num_bytes > kBinaryMemoryLimit)) {
     std::stringstream ss;
-    ss << "BinaryArray cannot contain more than " << kMaximumCapacity << " bytes, have "
+    ss << "BinaryArray cannot contain more than " << kBinaryMemoryLimit << " bytes, have "
        << num_bytes;
     return Status::Invalid(ss.str());
   }
diff --git a/cpp/src/arrow/builder.h b/cpp/src/arrow/builder.h
index dabfb75..cdcee80 100644
--- a/cpp/src/arrow/builder.h
+++ b/cpp/src/arrow/builder.h
@@ -41,13 +41,16 @@
 class Array;
 class Decimal128;
 
+constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max() - 1;
+constexpr int64_t kListMaximumElements = std::numeric_limits<int32_t>::max() - 1;
+
 namespace internal {
 
 struct ArrayData;
 
 }  // namespace internal
 
-static constexpr int64_t kMinBuilderCapacity = 1 << 5;
+constexpr int64_t kMinBuilderCapacity = 1 << 5;
 
 /// Base class for all data array builders.
 //
@@ -702,8 +705,6 @@
   TypedBufferBuilder<int32_t> offsets_builder_;
   TypedBufferBuilder<uint8_t> value_data_builder_;
 
-  static constexpr int64_t kMaximumCapacity = std::numeric_limits<int32_t>::max() - 1;
-
   Status AppendNextOffset();
   void Reset();
 };
diff --git a/cpp/src/arrow/python/numpy_to_arrow.cc b/cpp/src/arrow/python/numpy_to_arrow.cc
index 4d91e53..71bf69f 100644
--- a/cpp/src/arrow/python/numpy_to_arrow.cc
+++ b/cpp/src/arrow/python/numpy_to_arrow.cc
@@ -60,8 +60,6 @@
 
 using internal::NumPyTypeSize;
 
-constexpr int64_t kBinaryMemoryLimit = std::numeric_limits<int32_t>::max();
-
 // ----------------------------------------------------------------------
 // Conversion utilities
 
diff --git a/python/pyarrow/tests/test_convert_pandas.py b/python/pyarrow/tests/test_convert_pandas.py
index dff4e10..d448de0 100644
--- a/python/pyarrow/tests/test_convert_pandas.py
+++ b/python/pyarrow/tests/test_convert_pandas.py
@@ -1048,9 +1048,12 @@
 
     @pytest.mark.large_memory
     def test_bytes_exceed_2gb(self):
-        val = 'x' * (1 << 20)
+        v1 = b'x' * 100000000
+        v2 = b'x' * 147483646
+
+        # ARROW-2227, hit exactly 2GB on the nose
         df = pd.DataFrame({
-            'strings': np.array([val] * 4000, dtype=object)
+            'strings': [v1] * 20 + [v2] + ['x'] * 20
         })
         arr = pa.array(df['strings'])
         assert isinstance(arr, pa.ChunkedArray)