PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to ac…
…cept batch_size param of int64_t type
Author: Max Risuhin <risuhin.max@gmail.com>
Closes #349 from MaxRis/PARQUET-1008 and squashes the following commits:
9e0db07 [Max Risuhin] PARQUET-1008: [C++] TypedColumnReader::ReadBatch method updated to accept batch_size param of int64_t type
diff --git a/src/parquet/column/reader.cc b/src/parquet/column/reader.cc
index fe2de57..bc4e4a0 100644
--- a/src/parquet/column/reader.cc
+++ b/src/parquet/column/reader.cc
@@ -119,9 +119,9 @@
// Levels are encoded as rle or bit-packed.
// Init repetition levels
if (descr_->max_repetition_level() > 0) {
- int64_t rep_levels_bytes =
- repetition_level_decoder_.SetData(page->repetition_level_encoding(),
- descr_->max_repetition_level(), num_buffered_values_, buffer);
+ int64_t rep_levels_bytes = repetition_level_decoder_.SetData(
+ page->repetition_level_encoding(), descr_->max_repetition_level(),
+ static_cast<int>(num_buffered_values_), buffer);
buffer += rep_levels_bytes;
data_size -= rep_levels_bytes;
}
@@ -130,9 +130,9 @@
// Init definition levels
if (descr_->max_definition_level() > 0) {
- int64_t def_levels_bytes =
- definition_level_decoder_.SetData(page->definition_level_encoding(),
- descr_->max_definition_level(), num_buffered_values_, buffer);
+ int64_t def_levels_bytes = definition_level_decoder_.SetData(
+ page->definition_level_encoding(), descr_->max_definition_level(),
+ static_cast<int>(num_buffered_values_), buffer);
buffer += def_levels_bytes;
data_size -= def_levels_bytes;
}
@@ -170,7 +170,7 @@
}
}
current_decoder_->SetData(
- num_buffered_values_, buffer, static_cast<int>(data_size));
+ static_cast<int>(num_buffered_values_), buffer, static_cast<int>(data_size));
return true;
} else {
// We don't know what this page type is. We're allowed to skip non-data
diff --git a/src/parquet/column/reader.h b/src/parquet/column/reader.h
index 80084b2..f36db5e 100644
--- a/src/parquet/column/reader.h
+++ b/src/parquet/column/reader.h
@@ -91,11 +91,11 @@
// values. For repeated or optional values, there may be fewer data values
// than levels, and this tells you how many encoded levels there are in that
// case.
- int num_buffered_values_;
+ int64_t num_buffered_values_;
// The number of values from the current data page that have been decoded
// into memory
- int num_decoded_values_;
+ int64_t num_decoded_values_;
::arrow::MemoryPool* pool_;
};
@@ -128,8 +128,8 @@
// This API is the same for both V1 and V2 of the DataPage
//
// @returns: actual number of levels read (see values_read for number of values read)
- int64_t ReadBatch(int batch_size, int16_t* def_levels, int16_t* rep_levels, T* values,
- int64_t* values_read);
+ int64_t ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
+ T* values, int64_t* values_read);
/// Read a batch of repetition levels, definition levels, and values from the
/// column and leave spaces for null entries on the lowest level in the values
@@ -165,7 +165,7 @@
/// (i.e. definition_level == max_definition_level - 1)
/// @param[out] null_count The number of nulls on the lowest levels.
/// (i.e. (values_read - null_count) is total number of non-null entries)
- int64_t ReadBatchSpaced(int batch_size, int16_t* def_levels, int16_t* rep_levels,
+ int64_t ReadBatchSpaced(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels,
T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read,
int64_t* values_read, int64_t* null_count);
@@ -217,8 +217,8 @@
}
template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatch(int batch_size, int16_t* def_levels,
- int16_t* rep_levels, T* values, int64_t* values_read) {
+inline int64_t TypedColumnReader<DType>::ReadBatch(int64_t batch_size,
+ int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) {
// HasNext invokes ReadNewPage
if (!HasNext()) {
*values_read = 0;
@@ -257,7 +257,7 @@
*values_read = ReadValues(values_to_read, values);
int64_t total_values = std::max(num_def_levels, *values_read);
- num_decoded_values_ += static_cast<int>(total_values);
+ num_decoded_values_ += total_values;
return total_values;
}
@@ -293,7 +293,7 @@
}
template <typename DType>
-inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int batch_size,
+inline int64_t TypedColumnReader<DType>::ReadBatchSpaced(int64_t batch_size,
int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits,
int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read,
int64_t* null_count_out) {
@@ -354,7 +354,7 @@
*levels_read = total_values;
}
- num_decoded_values_ += static_cast<int>(*levels_read);
+ num_decoded_values_ += *levels_read;
return total_values;
}