PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to ac…

…cept batch_size param of int64_t type

Author: Max Risuhin <risuhin.max@gmail.com>

Closes #349 from MaxRis/PARQUET-1008 and squashes the following commits:

9e0db07 [Max Risuhin] PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to accept batch_size param of int64_t type
diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc
index fe2de57..bc4e4a0 100644
--- a/src/parquet/column/reader.cc
+++ b/src/parquet/column/reader.cc
@@ -119,9 +119,9 @@
       // Levels are encoded as rle or bit-packed.
       // Init repetition levels
       if (descr_->max_repetition_level() > 0) {
-        int64_t rep_levels_bytes =
-            repetition_level_decoder_.SetData(page->repetition_level_encoding(),
-                descr_->max_repetition_level(), num_buffered_values_, buffer);
+        int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
+            page->repetition_level_encoding(), descr_->max_repetition_level(),
+            static_cast<int>(num_buffered_values_), buffer);
         buffer += rep_levels_bytes;
         data_size -= rep_levels_bytes;
       }
@@ -130,9 +130,9 @@
 
       // Init definition levels
       if (descr_->max_definition_level() > 0) {
-        int64_t def_levels_bytes =
-            definition_level_decoder_.SetData(page->definition_level_encoding(),
-                descr_->max_definition_level(), num_buffered_values_, buffer);
+        int64_t def_levels_bytes = definition_level_decoder_.SetData(
+            page->definition_level_encoding(), descr_->max_definition_level(),
+            static_cast<int>(num_buffered_values_), buffer);
         buffer += def_levels_bytes;
         data_size -= def_levels_bytes;
       }
@@ -170,7 +170,7 @@
         }
       }
       current_decoder_->SetData(
-          num_buffered_values_, buffer, static_cast<int>(data_size));
+          static_cast<int>(num_buffered_values_), buffer, static_cast<int>(data_size));
       return true;
     } else {
       // We don't know what this page type is. We're allowed to skip non-data
diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h
index 80084b2..f36db5e 100644
--- a/src/parquet/column/reader.h
+++ b/src/parquet/column/reader.h
@@ -91,11 +91,11 @@
   // values. For repeated or optional values, there may be fewer data values
   // than levels, and this tells you how many encoded levels there are in that
   // case.
-  int num_buffered_values_;
+  int64_t num_buffered_values_;
 
   // The number of values from the current data page that have been decoded
   // into memory
-  int num_decoded_values_;
+  int64_t num_decoded_values_;
 
   ::arrow::MemoryPool* pool_;
 };
@@ -128,8 +128,8 @@
   // This API is the same for both V1 and V2 of the DataPage
   //
   // @returns: actual number of levels read (see values_read for number of values read)
-  int64_t ReadBatch(int batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
-      int64_t* values_read);
+  int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+      T* values, int64_t* values_read);
 
   /// Read a batch of repetition levels, definition levels, and values from the
   /// column and leave spaces for null entries on the lowest level in the values
@@ -165,7 +165,7 @@
   ///   (i.e. definition_level == max_definition_level - 1)
   /// @param[out] null_count The number of nulls on the lowest levels.
   ///   (i.e. (values_read - null_count) is total number of non-null entries)
-  int64_t ReadBatchSpaced(int batch_size, int16_t* def_levels, int16_t* rep_levels,
+  int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
       T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
       int64_t* values_read, int64_t* null_count);
 
@@ -217,8 +217,8 @@
 }
 
 template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_levels,
-    int16_t* rep_levels, T* values, int64_t* values_read) {
+inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
+    int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) {
   // HasNext invokes ReadNewPage
   if (!HasNext()) {
     *values_read = 0;
@@ -257,7 +257,7 @@
 
   *values_read = ReadValues(values_to_read, values);
   int64_t total_values = std::max(num_def_levels, *values_read);
-  num_decoded_values_ += static_cast<int>(total_values);
+  num_decoded_values_ += total_values;
 
   return total_values;
 }
@@ -293,7 +293,7 @@
 }
 
 template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
+inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size,
     int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits,
     int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read,
     int64_t* null_count_out) {
@@ -354,7 +354,7 @@
     *levels_read = total_values;
   }
 
-  num_decoded_values_ += static_cast<int>(*levels_read);
+  num_decoded_values_ += *levels_read;
   return total_values;
 }