be/src/exec/parquet-common.h - impala - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.


 #ifndef IMPALA_EXEC_PARQUET_COMMON_H
 #define IMPALA_EXEC_PARQUET_COMMON_H

 #include "common/compiler-util.h"
 #include "gen-cpp/Descriptors_types.h"
 #include "gen-cpp/parquet_types.h"
 #include "runtime/decimal-value.h"
 #include "runtime/string-value.h"
 #include "util/bit-util.h"
 #include "util/decimal-util.h"

 /// This file contains common elements between the parquet Writer and Scanner.
 namespace impala {

 class TimestampValue;

 const uint8_t PARQUET_VERSION_NUMBER[4] = {'P', 'A', 'R', '1'};
 const uint32_t PARQUET_CURRENT_VERSION = 1;

 /// Mapping of impala types to parquet storage types.  This is indexed by
 /// PrimitiveType enum
 const parquet::Type::type IMPALA_TO_PARQUET_TYPES[] = {
   parquet::Type::BOOLEAN,     // Invalid
   parquet::Type::BOOLEAN,     // NULL type
   parquet::Type::BOOLEAN,
   parquet::Type::INT32,
   parquet::Type::INT32,
   parquet::Type::INT32,
   parquet::Type::INT64,
   parquet::Type::FLOAT,
   parquet::Type::DOUBLE,
   parquet::Type::INT96,       // Timestamp
   parquet::Type::BYTE_ARRAY,  // String
   parquet::Type::BYTE_ARRAY,  // Date, NYI
   parquet::Type::BYTE_ARRAY,  // DateTime, NYI
   parquet::Type::BYTE_ARRAY,  // Binary NYI
   parquet::Type::FIXED_LEN_BYTE_ARRAY, // Decimal
   parquet::Type::BYTE_ARRAY,  // VARCHAR(N)
   parquet::Type::BYTE_ARRAY,  // CHAR(N)
 };

 /// Mapping of Parquet codec enums to Impala enums
 const THdfsCompression::type PARQUET_TO_IMPALA_CODEC[] = {
   THdfsCompression::NONE,
   THdfsCompression::SNAPPY,
   THdfsCompression::GZIP,
   THdfsCompression::LZO
 };

 /// Mapping of Impala codec enums to Parquet enums
 const parquet::CompressionCodec::type IMPALA_TO_PARQUET_CODEC[] = {
   parquet::CompressionCodec::UNCOMPRESSED,
   parquet::CompressionCodec::SNAPPY,  // DEFAULT
   parquet::CompressionCodec::GZIP,    // GZIP
   parquet::CompressionCodec::GZIP,    // DEFLATE
   parquet::CompressionCodec::SNAPPY,
   parquet::CompressionCodec::SNAPPY,  // SNAPPY_BLOCKED
   parquet::CompressionCodec::LZO,
 };

 /// The plain encoding does not maintain any state so all these functions
 /// are static helpers.
 /// TODO: we are using templates to provide a generic interface (over the
 /// types) to avoid performance penalties. This makes the code more complex
 /// and should be removed when we have codegen support to inline virtual
 /// calls.
 class ParquetPlainEncoder {
  public:
   /// Returns the byte size of 'v'.
   template <typename T>
   static int ByteSize(const T& v) { return sizeof(T); }

   /// Returns the encoded size of values of type t. Returns -1 if it is variable
   /// length. This can be different than the slot size of the types.
   static int EncodedByteSize(const ColumnType& t) {
     switch (t.type) {
       case TYPE_STRING:
       case TYPE_VARCHAR:
       case TYPE_CHAR:
         // CHAR is varlen here because we don't write the padding to the file
         return -1;
       case TYPE_TINYINT:
       case TYPE_SMALLINT:
       case TYPE_INT:
       case TYPE_FLOAT:
         return 4;
       case TYPE_BIGINT:
       case TYPE_DOUBLE:
         return 8;
       case TYPE_TIMESTAMP:
         return 12;
       case TYPE_DECIMAL:
         return DecimalSize(t);
       case TYPE_NULL:
       case TYPE_BOOLEAN: // These types are not plain encoded.
       default:
         DCHECK(false);
         return -1;
     }
   }

   /// The minimum byte size to store decimals of with precision t.precision.
   static int DecimalSize(const ColumnType& t) {
     DCHECK(t.type == TYPE_DECIMAL);
     // Numbers in the comment is the max positive value that can be represented
     // with those number of bits (max negative is -(X + 1)).
     // TODO: use closed form for this?
     switch (t.precision) {
       case 1: case 2:
         return 1; // 127
       case 3: case 4:
         return 2; // 32,767
       case 5: case 6:
         return 3; // 8,388,607
       case 7: case 8: case 9:
         return 4; // 2,147,483,427
       case 10: case 11:
         return 5; // 549,755,813,887
       case 12: case 13: case 14:
         return 6; // 140,737,488,355,327
       case 15: case 16:
         return 7; // 36,028,797,018,963,967
       case 17: case 18:
         return 8; // 9,223,372,036,854,775,807
       case 19: case 20: case 21:
         return 9; // 2,361,183,241,434,822,606,847
       case 22: case 23:
         return 10; // 604,462,909,807,314,587,353,087
       case 24: case 25: case 26:
         return 11; // 154,742,504,910,672,534,362,390,527
       case 27: case 28:
         return 12; // 39,614,081,257,132,168,796,771,975,167
       case 29: case 30: case 31:
         return 13; // 10,141,204,801,825,835,211,973,625,643,007
       case 32: case 33:
         return 14; // 2,596,148,429,267,413,814,265,248,164,610,047
       case 34: case 35:
         return 15; // 664,613,997,892,457,936,451,903,530,140,172,287
       case 36: case 37: case 38:
         return 16; // 170,141,183,460,469,231,731,687,303,715,884,105,727
       default:
         DCHECK(false);
         break;
     }
     return -1;
   }

   /// Encodes t into buffer. Returns the number of bytes added.  buffer must
   /// be preallocated and big enough.  Buffer need not be aligned.
   /// 'fixed_len_size' is only applicable for data encoded using FIXED_LEN_BYTE_ARRAY and
   /// is the number of bytes the plain encoder should use.
   template <typename T>
   static int Encode(const T& t, int fixed_len_size, uint8_t* buffer) {
     memcpy(buffer, &t, ByteSize(t));
     return ByteSize(t);
   }

   /// Decodes t from 'buffer', reading up to the byte before 'buffer_end'. 'buffer'
   /// need not be aligned. For types that are stored as FIXED_LEN_BYTE_ARRAY,
   /// 'fixed_len_size' is the size of the object. Otherwise, it is unused.
   /// Returns the number of bytes read or -1 if the value was not decoded successfully.
   template <typename T>
   static int Decode(const uint8_t* buffer, const uint8_t* buffer_end, int fixed_len_size,
       T* v) {
     int byte_size = ByteSize(*v);
     if (UNLIKELY(buffer_end - buffer < byte_size)) return -1;
     memcpy(v, buffer, byte_size);
     return byte_size;
   }
 };

 /// Calling this with arguments of type ColumnType is certainly a programmer error, so we
 /// disallow it.
 template <> int ParquetPlainEncoder::ByteSize(const ColumnType& t);

 /// Disable for bools. Plain encoding is not used for booleans.
 template <> int ParquetPlainEncoder::ByteSize(const bool& b);
 template <> int ParquetPlainEncoder::Encode(const bool&, int fixed_len_size, uint8_t*);
 template <> int ParquetPlainEncoder::Decode(const uint8_t*, const uint8_t*,
     int fixed_len_size, bool* v);

 /// Not used for decimals since the plain encoding encodes them using
 /// FIXED_LEN_BYTE_ARRAY.
 template <>
 inline int ParquetPlainEncoder::ByteSize(const Decimal4Value&) {
   DCHECK(false);
   return -1;
 }
 template <>
 inline int ParquetPlainEncoder::ByteSize(const Decimal8Value&) {
   DCHECK(false);
   return -1;
 }
 template <>
 inline int ParquetPlainEncoder::ByteSize(const Decimal16Value&) {
   DCHECK(false);
   return -1;
 }

 /// Parquet doesn't have 8-bit or 16-bit ints. They are converted to 32-bit.
 template <>
 inline int ParquetPlainEncoder::ByteSize(const int8_t& v) { return sizeof(int32_t); }
 template <>
 inline int ParquetPlainEncoder::ByteSize(const int16_t& v) { return sizeof(int32_t); }

 template <>
 inline int ParquetPlainEncoder::ByteSize(const StringValue& v) {
   return sizeof(int32_t) + v.len;
 }

 template <>
 inline int ParquetPlainEncoder::ByteSize(const TimestampValue& v) {
   return 12;
 }

 template <>
 inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
     int fixed_len_size, int8_t* v) {
   int byte_size = ByteSize(*v);
   if (UNLIKELY(buffer_end - buffer < byte_size)) return -1;
   *v = *buffer;
   return byte_size;
 }
 template <>
 inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
     int fixed_len_size, int16_t* v) {
   int byte_size = ByteSize(*v);
   if (UNLIKELY(buffer_end - buffer < byte_size)) return -1;
   memcpy(v, buffer, sizeof(int16_t));
   return byte_size;
 }

 template <>
 inline int ParquetPlainEncoder::Encode(
     const int8_t& v, int fixed_len_size, uint8_t* buffer) {
   int32_t val = v;
   memcpy(buffer, &val, sizeof(int32_t));
   return ByteSize(v);
 }

 template <>
 inline int ParquetPlainEncoder::Encode(
     const int16_t& v, int fixed_len_size, uint8_t* buffer) {
   int32_t val = v;
   memcpy(buffer, &val, sizeof(int32_t));
   return ByteSize(v);
 }

 template <>
 inline int ParquetPlainEncoder::Encode(
     const StringValue& v, int fixed_len_size, uint8_t* buffer) {
   memcpy(buffer, &v.len, sizeof(int32_t));
   memcpy(buffer + sizeof(int32_t), v.ptr, v.len);
   return ByteSize(v);
 }

 template <>
 inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
     int fixed_len_size, StringValue* v) {
   if (UNLIKELY(buffer_end - buffer < sizeof(int32_t))) return -1;
   memcpy(&v->len, buffer, sizeof(int32_t));
   int byte_size = ByteSize(*v);
   if (UNLIKELY(v->len < 0 || buffer_end - buffer < byte_size)) return -1;
   v->ptr = reinterpret_cast<char*>(const_cast<uint8_t*>(buffer)) + sizeof(int32_t);
   if (fixed_len_size > 0) v->len = std::min(v->len, fixed_len_size);
   // we still read byte_size bytes, even if we truncate
   return byte_size;
 }

 /// Write decimals as big endian (byte comparable) to benefit from common prefixes.
 /// fixed_len_size can be less than sizeof(Decimal*Value) for space savings. This means
 /// that the value in the in-memory format has leading zeros or negative 1's.
 /// For example, precision 2 fits in 1 byte. All decimals stored as Decimal4Value
 /// will have 3 bytes of leading zeros, we will only store the interesting byte.
 template <>
 inline int ParquetPlainEncoder::Encode(
     const Decimal4Value& v, int fixed_len_size, uint8_t* buffer) {
   DecimalUtil::EncodeToFixedLenByteArray(buffer, fixed_len_size, v);
   return fixed_len_size;
 }

 template <>
 inline int ParquetPlainEncoder::Encode(
     const Decimal8Value& v, int fixed_len_size, uint8_t* buffer) {
   DecimalUtil::EncodeToFixedLenByteArray(buffer, fixed_len_size, v);
   return fixed_len_size;
 }

 template <>
 inline int ParquetPlainEncoder::Encode(
     const Decimal16Value& v, int fixed_len_size, uint8_t* buffer) {
   DecimalUtil::EncodeToFixedLenByteArray(buffer, fixed_len_size, v);
   return fixed_len_size;
 }

 template <>
 inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
     int fixed_len_size, Decimal4Value* v) {
   if (UNLIKELY(buffer_end - buffer < fixed_len_size)) return -1;
   DecimalUtil::DecodeFromFixedLenByteArray(buffer, fixed_len_size, v);
   return fixed_len_size;
 }

 template <>
 inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
     int fixed_len_size, Decimal8Value* v) {
   if (UNLIKELY(buffer_end - buffer < fixed_len_size)) return -1;
   DecimalUtil::DecodeFromFixedLenByteArray(buffer, fixed_len_size, v);
   return fixed_len_size;
 }

 template <>
 inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
     int fixed_len_size, Decimal16Value* v) {
   if (UNLIKELY(buffer_end - buffer < fixed_len_size)) return -1;
   DecimalUtil::DecodeFromFixedLenByteArray(buffer, fixed_len_size, v);
   return fixed_len_size;
 }

 }
 #endif
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.


	#ifndef IMPALA_EXEC_PARQUET_COMMON_H
	#define IMPALA_EXEC_PARQUET_COMMON_H

	#include "common/compiler-util.h"
	#include "gen-cpp/Descriptors_types.h"
	#include "gen-cpp/parquet_types.h"
	#include "runtime/decimal-value.h"
	#include "runtime/string-value.h"
	#include "util/bit-util.h"
	#include "util/decimal-util.h"

	/// This file contains common elements between the parquet Writer and Scanner.
	namespace impala {

	class TimestampValue;

	const uint8_t PARQUET_VERSION_NUMBER[4] = {'P', 'A', 'R', '1'};
	const uint32_t PARQUET_CURRENT_VERSION = 1;

	/// Mapping of impala types to parquet storage types. This is indexed by
	/// PrimitiveType enum
	const parquet::Type::type IMPALA_TO_PARQUET_TYPES[] = {
	parquet::Type::BOOLEAN, // Invalid
	parquet::Type::BOOLEAN, // NULL type
	parquet::Type::BOOLEAN,
	parquet::Type::INT32,
	parquet::Type::INT32,
	parquet::Type::INT32,
	parquet::Type::INT64,
	parquet::Type::FLOAT,
	parquet::Type::DOUBLE,
	parquet::Type::INT96, // Timestamp
	parquet::Type::BYTE_ARRAY, // String
	parquet::Type::BYTE_ARRAY, // Date, NYI
	parquet::Type::BYTE_ARRAY, // DateTime, NYI
	parquet::Type::BYTE_ARRAY, // Binary NYI
	parquet::Type::FIXED_LEN_BYTE_ARRAY, // Decimal
	parquet::Type::BYTE_ARRAY, // VARCHAR(N)
	parquet::Type::BYTE_ARRAY, // CHAR(N)
	};

	/// Mapping of Parquet codec enums to Impala enums
	const THdfsCompression::type PARQUET_TO_IMPALA_CODEC[] = {
	THdfsCompression::NONE,
	THdfsCompression::SNAPPY,
	THdfsCompression::GZIP,
	THdfsCompression::LZO
	};

	/// Mapping of Impala codec enums to Parquet enums
	const parquet::CompressionCodec::type IMPALA_TO_PARQUET_CODEC[] = {
	parquet::CompressionCodec::UNCOMPRESSED,
	parquet::CompressionCodec::SNAPPY, // DEFAULT
	parquet::CompressionCodec::GZIP, // GZIP
	parquet::CompressionCodec::GZIP, // DEFLATE
	parquet::CompressionCodec::SNAPPY,
	parquet::CompressionCodec::SNAPPY, // SNAPPY_BLOCKED
	parquet::CompressionCodec::LZO,
	};

	/// The plain encoding does not maintain any state so all these functions
	/// are static helpers.
	/// TODO: we are using templates to provide a generic interface (over the
	/// types) to avoid performance penalties. This makes the code more complex
	/// and should be removed when we have codegen support to inline virtual
	/// calls.
	class ParquetPlainEncoder {
	public:
	/// Returns the byte size of 'v'.
	template <typename T>
	static int ByteSize(const T& v) { return sizeof(T); }

	/// Returns the encoded size of values of type t. Returns -1 if it is variable
	/// length. This can be different than the slot size of the types.
	static int EncodedByteSize(const ColumnType& t) {
	switch (t.type) {
	case TYPE_STRING:
	case TYPE_VARCHAR:
	case TYPE_CHAR:
	// CHAR is varlen here because we don't write the padding to the file
	return -1;
	case TYPE_TINYINT:
	case TYPE_SMALLINT:
	case TYPE_INT:
	case TYPE_FLOAT:
	return 4;
	case TYPE_BIGINT:
	case TYPE_DOUBLE:
	return 8;
	case TYPE_TIMESTAMP:
	return 12;
	case TYPE_DECIMAL:
	return DecimalSize(t);
	case TYPE_NULL:
	case TYPE_BOOLEAN: // These types are not plain encoded.
	default:
	DCHECK(false);
	return -1;
	}
	}

	/// The minimum byte size to store decimals of with precision t.precision.
	static int DecimalSize(const ColumnType& t) {
	DCHECK(t.type == TYPE_DECIMAL);
	// Numbers in the comment is the max positive value that can be represented
	// with those number of bits (max negative is -(X + 1)).
	// TODO: use closed form for this?
	switch (t.precision) {
	case 1: case 2:
	return 1; // 127
	case 3: case 4:
	return 2; // 32,767
	case 5: case 6:
	return 3; // 8,388,607
	case 7: case 8: case 9:
	return 4; // 2,147,483,427
	case 10: case 11:
	return 5; // 549,755,813,887
	case 12: case 13: case 14:
	return 6; // 140,737,488,355,327
	case 15: case 16:
	return 7; // 36,028,797,018,963,967
	case 17: case 18:
	return 8; // 9,223,372,036,854,775,807
	case 19: case 20: case 21:
	return 9; // 2,361,183,241,434,822,606,847
	case 22: case 23:
	return 10; // 604,462,909,807,314,587,353,087
	case 24: case 25: case 26:
	return 11; // 154,742,504,910,672,534,362,390,527
	case 27: case 28:
	return 12; // 39,614,081,257,132,168,796,771,975,167
	case 29: case 30: case 31:
	return 13; // 10,141,204,801,825,835,211,973,625,643,007
	case 32: case 33:
	return 14; // 2,596,148,429,267,413,814,265,248,164,610,047
	case 34: case 35:
	return 15; // 664,613,997,892,457,936,451,903,530,140,172,287
	case 36: case 37: case 38:
	return 16; // 170,141,183,460,469,231,731,687,303,715,884,105,727
	default:
	DCHECK(false);
	break;
	}
	return -1;
	}

	/// Encodes t into buffer. Returns the number of bytes added. buffer must
	/// be preallocated and big enough. Buffer need not be aligned.
	/// 'fixed_len_size' is only applicable for data encoded using FIXED_LEN_BYTE_ARRAY and
	/// is the number of bytes the plain encoder should use.
	template <typename T>
	static int Encode(const T& t, int fixed_len_size, uint8_t* buffer) {
	memcpy(buffer, &t, ByteSize(t));
	return ByteSize(t);
	}

	/// Decodes t from 'buffer', reading up to the byte before 'buffer_end'. 'buffer'
	/// need not be aligned. For types that are stored as FIXED_LEN_BYTE_ARRAY,
	/// 'fixed_len_size' is the size of the object. Otherwise, it is unused.
	/// Returns the number of bytes read or -1 if the value was not decoded successfully.
	template <typename T>
	static int Decode(const uint8_t* buffer, const uint8_t* buffer_end, int fixed_len_size,
	T* v) {
	int byte_size = ByteSize(*v);
	if (UNLIKELY(buffer_end - buffer < byte_size)) return -1;
	memcpy(v, buffer, byte_size);
	return byte_size;
	}
	};

	/// Calling this with arguments of type ColumnType is certainly a programmer error, so we
	/// disallow it.
	template <> int ParquetPlainEncoder::ByteSize(const ColumnType& t);

	/// Disable for bools. Plain encoding is not used for booleans.
	template <> int ParquetPlainEncoder::ByteSize(const bool& b);
	template <> int ParquetPlainEncoder::Encode(const bool&, int fixed_len_size, uint8_t*);
	template <> int ParquetPlainEncoder::Decode(const uint8_t, const uint8_t,
	int fixed_len_size, bool* v);

	/// Not used for decimals since the plain encoding encodes them using
	/// FIXED_LEN_BYTE_ARRAY.
	template <>
	inline int ParquetPlainEncoder::ByteSize(const Decimal4Value&) {
	DCHECK(false);
	return -1;
	}
	template <>
	inline int ParquetPlainEncoder::ByteSize(const Decimal8Value&) {
	DCHECK(false);
	return -1;
	}
	template <>
	inline int ParquetPlainEncoder::ByteSize(const Decimal16Value&) {
	DCHECK(false);
	return -1;
	}

	/// Parquet doesn't have 8-bit or 16-bit ints. They are converted to 32-bit.
	template <>
	inline int ParquetPlainEncoder::ByteSize(const int8_t& v) { return sizeof(int32_t); }
	template <>
	inline int ParquetPlainEncoder::ByteSize(const int16_t& v) { return sizeof(int32_t); }

	template <>
	inline int ParquetPlainEncoder::ByteSize(const StringValue& v) {
	return sizeof(int32_t) + v.len;
	}

	template <>
	inline int ParquetPlainEncoder::ByteSize(const TimestampValue& v) {
	return 12;
	}

	template <>
	inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
	int fixed_len_size, int8_t* v) {
	int byte_size = ByteSize(*v);
	if (UNLIKELY(buffer_end - buffer < byte_size)) return -1;
	v = buffer;
	return byte_size;
	}
	template <>
	inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
	int fixed_len_size, int16_t* v) {
	int byte_size = ByteSize(*v);
	if (UNLIKELY(buffer_end - buffer < byte_size)) return -1;
	memcpy(v, buffer, sizeof(int16_t));
	return byte_size;
	}

	template <>
	inline int ParquetPlainEncoder::Encode(
	const int8_t& v, int fixed_len_size, uint8_t* buffer) {
	int32_t val = v;
	memcpy(buffer, &val, sizeof(int32_t));
	return ByteSize(v);
	}

	template <>
	inline int ParquetPlainEncoder::Encode(
	const int16_t& v, int fixed_len_size, uint8_t* buffer) {
	int32_t val = v;
	memcpy(buffer, &val, sizeof(int32_t));
	return ByteSize(v);
	}

	template <>
	inline int ParquetPlainEncoder::Encode(
	const StringValue& v, int fixed_len_size, uint8_t* buffer) {
	memcpy(buffer, &v.len, sizeof(int32_t));
	memcpy(buffer + sizeof(int32_t), v.ptr, v.len);
	return ByteSize(v);
	}

	template <>
	inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
	int fixed_len_size, StringValue* v) {
	if (UNLIKELY(buffer_end - buffer < sizeof(int32_t))) return -1;
	memcpy(&v->len, buffer, sizeof(int32_t));
	int byte_size = ByteSize(*v);
	if (UNLIKELY(v->len < 0 \|\| buffer_end - buffer < byte_size)) return -1;
	v->ptr = reinterpret_cast<char>(const_cast<uint8_t>(buffer)) + sizeof(int32_t);
	if (fixed_len_size > 0) v->len = std::min(v->len, fixed_len_size);
	// we still read byte_size bytes, even if we truncate
	return byte_size;
	}

	/// Write decimals as big endian (byte comparable) to benefit from common prefixes.
	/// fixed_len_size can be less than sizeof(Decimal*Value) for space savings. This means
	/// that the value in the in-memory format has leading zeros or negative 1's.
	/// For example, precision 2 fits in 1 byte. All decimals stored as Decimal4Value
	/// will have 3 bytes of leading zeros, we will only store the interesting byte.
	template <>
	inline int ParquetPlainEncoder::Encode(
	const Decimal4Value& v, int fixed_len_size, uint8_t* buffer) {
	DecimalUtil::EncodeToFixedLenByteArray(buffer, fixed_len_size, v);
	return fixed_len_size;
	}

	template <>
	inline int ParquetPlainEncoder::Encode(
	const Decimal8Value& v, int fixed_len_size, uint8_t* buffer) {
	DecimalUtil::EncodeToFixedLenByteArray(buffer, fixed_len_size, v);
	return fixed_len_size;
	}

	template <>
	inline int ParquetPlainEncoder::Encode(
	const Decimal16Value& v, int fixed_len_size, uint8_t* buffer) {
	DecimalUtil::EncodeToFixedLenByteArray(buffer, fixed_len_size, v);
	return fixed_len_size;
	}

	template <>
	inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
	int fixed_len_size, Decimal4Value* v) {
	if (UNLIKELY(buffer_end - buffer < fixed_len_size)) return -1;
	DecimalUtil::DecodeFromFixedLenByteArray(buffer, fixed_len_size, v);
	return fixed_len_size;
	}

	template <>
	inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
	int fixed_len_size, Decimal8Value* v) {
	if (UNLIKELY(buffer_end - buffer < fixed_len_size)) return -1;
	DecimalUtil::DecodeFromFixedLenByteArray(buffer, fixed_len_size, v);
	return fixed_len_size;
	}

	template <>
	inline int ParquetPlainEncoder::Decode(const uint8_t* buffer, const uint8_t* buffer_end,
	int fixed_len_size, Decimal16Value* v) {
	if (UNLIKELY(buffer_end - buffer < fixed_len_size)) return -1;
	DecimalUtil::DecodeFromFixedLenByteArray(buffer, fixed_len_size, v);
	return fixed_len_size;
	}

	}
	#endif