be/src/exec/parquet/parquet-bool-decoder.cc - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #include "exec/parquet/parquet-bool-decoder.h"

 #include "util/mem-util.h"

 #include "common/names.h"

 namespace impala {

 bool ParquetBoolDecoder::SetData(
     parquet::Encoding::type encoding, uint8_t* data, int size) {
   encoding_ = encoding;
   // Only the relevant decoder is initialized for a given data page.
   switch (encoding) {
     case parquet::Encoding::PLAIN:
       bool_values_.Reset(data, size);
       break;
     case parquet::Encoding::RLE:
       // The first 4 bytes contain the size of the encoded data. This information is
       // redundant, as this is the last part of the data page, and the number of
       // remaining bytes is already known.
       rle_decoder_.Reset(data + 4, size - 4, 1);
       break;
     default:
       return false;
   }
   num_unpacked_values_ = 0;
   unpacked_value_idx_ = 0;
   return true;
 }

 bool ParquetBoolDecoder::DecodeValues(
     int64_t stride, int64_t count, bool* RESTRICT first_value) RESTRICT {
   if (encoding_ == parquet::Encoding::PLAIN) {
     return DecodeValues<parquet::Encoding::PLAIN>(stride, count, first_value);
   } else {
     DCHECK_EQ(encoding_, parquet::Encoding::RLE);
     return DecodeValues<parquet::Encoding::RLE>(stride, count, first_value);
   }
 }

 template <parquet::Encoding::type ENCODING>
 bool ParquetBoolDecoder::DecodeValues(
     int64_t stride, int64_t count, bool* RESTRICT first_value) RESTRICT {
   // TODO: we could optimise this further if needed by bypassing 'unpacked_values_'.
   StrideWriter<bool> out(first_value, stride);
   for (int64_t i = 0; i < count; ++i) {
     if (UNLIKELY(!DecodeValue<ENCODING>(out.Advance()))) return false;
   }
   return true;
 }

 bool ParquetBoolDecoder::SkipValues(int num_values) {
   DCHECK_GT(num_values, 0);
   int skip_cached = min(num_unpacked_values_ - unpacked_value_idx_, num_values);
   unpacked_value_idx_ += skip_cached;
   if (skip_cached == num_values) return true;
   int num_remaining = num_values - skip_cached;
   if (encoding_ == parquet::Encoding::PLAIN) {
     int num_to_skip = BitUtil::RoundDownToPowerOf2(num_remaining, 32);
     if (num_to_skip > 0) bool_values_.SkipBatch(1, num_to_skip);
     num_remaining -= num_to_skip;
     if (num_remaining > 0) {
       DCHECK_LE(num_remaining, UNPACKED_BUFFER_LEN);
       num_unpacked_values_ = bool_values_.UnpackBatch(1, UNPACKED_BUFFER_LEN,
           &unpacked_values_[0]);
       if (UNLIKELY(num_unpacked_values_ < num_remaining)) return false;
       unpacked_value_idx_ = num_remaining;
     }
     return true;
   } else {
     // rle_decoder_.SkipValues() might fill its internal buffer 'literal_buffer_'.
     // This can result in sub-optimal decoding later, because 'literal_buffer_' might
     // be used again and again, especially when reading a very long literal run.
     DCHECK_EQ(encoding_, parquet::Encoding::RLE);
     return rle_decoder_.SkipValues(num_remaining) == num_remaining;
   }
 }

 } // namespace impala
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#include "exec/parquet/parquet-bool-decoder.h"

	#include "util/mem-util.h"

	#include "common/names.h"

	namespace impala {

	bool ParquetBoolDecoder::SetData(
	parquet::Encoding::type encoding, uint8_t* data, int size) {
	encoding_ = encoding;
	// Only the relevant decoder is initialized for a given data page.
	switch (encoding) {
	case parquet::Encoding::PLAIN:
	bool_values_.Reset(data, size);
	break;
	case parquet::Encoding::RLE:
	// The first 4 bytes contain the size of the encoded data. This information is
	// redundant, as this is the last part of the data page, and the number of
	// remaining bytes is already known.
	rle_decoder_.Reset(data + 4, size - 4, 1);
	break;
	default:
	return false;
	}
	num_unpacked_values_ = 0;
	unpacked_value_idx_ = 0;
	return true;
	}

	bool ParquetBoolDecoder::DecodeValues(
	int64_t stride, int64_t count, bool* RESTRICT first_value) RESTRICT {
	if (encoding_ == parquet::Encoding::PLAIN) {
	return DecodeValues<parquet::Encoding::PLAIN>(stride, count, first_value);
	} else {
	DCHECK_EQ(encoding_, parquet::Encoding::RLE);
	return DecodeValues<parquet::Encoding::RLE>(stride, count, first_value);
	}
	}

	template <parquet::Encoding::type ENCODING>
	bool ParquetBoolDecoder::DecodeValues(
	int64_t stride, int64_t count, bool* RESTRICT first_value) RESTRICT {
	// TODO: we could optimise this further if needed by bypassing 'unpacked_values_'.
	StrideWriter<bool> out(first_value, stride);
	for (int64_t i = 0; i < count; ++i) {
	if (UNLIKELY(!DecodeValue<ENCODING>(out.Advance()))) return false;
	}
	return true;
	}

	bool ParquetBoolDecoder::SkipValues(int num_values) {
	DCHECK_GT(num_values, 0);
	int skip_cached = min(num_unpacked_values_ - unpacked_value_idx_, num_values);
	unpacked_value_idx_ += skip_cached;
	if (skip_cached == num_values) return true;
	int num_remaining = num_values - skip_cached;
	if (encoding_ == parquet::Encoding::PLAIN) {
	int num_to_skip = BitUtil::RoundDownToPowerOf2(num_remaining, 32);
	if (num_to_skip > 0) bool_values_.SkipBatch(1, num_to_skip);
	num_remaining -= num_to_skip;
	if (num_remaining > 0) {
	DCHECK_LE(num_remaining, UNPACKED_BUFFER_LEN);
	num_unpacked_values_ = bool_values_.UnpackBatch(1, UNPACKED_BUFFER_LEN,
	&unpacked_values_[0]);
	if (UNLIKELY(num_unpacked_values_ < num_remaining)) return false;
	unpacked_value_idx_ = num_remaining;
	}
	return true;
	} else {
	// rle_decoder_.SkipValues() might fill its internal buffer 'literal_buffer_'.
	// This can result in sub-optimal decoding later, because 'literal_buffer_' might
	// be used again and again, especially when reading a very long literal run.
	DCHECK_EQ(encoding_, parquet::Encoding::RLE);
	return rle_decoder_.SkipValues(num_remaining) == num_remaining;
	}
	}

	} // namespace impala