c++/src/BpackingAvx512.hh - orc - Git at Google

 /**
  * Licensed to the Apache Software Foundation (ASF) under one
  * or more contributor license agreements.  See the NOTICE file
  * distributed with this work for additional information
  * regarding copyright ownership.  The ASF licenses this file
  * to you under the Apache License, Version 2.0 (the
  * "License"); you may not use this file except in compliance
  * with the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 #ifndef ORC_BPACKINGAVX512_HH
 #define ORC_BPACKINGAVX512_HH

 #include <cstdint>
 #include <cstdlib>

 #include "BpackingDefault.hh"

 namespace orc {

 #define VECTOR_UNPACK_8BIT_MAX_NUM 64
 #define VECTOR_UNPACK_16BIT_MAX_NUM 32
 #define VECTOR_UNPACK_32BIT_MAX_NUM 16
 #define UNPACK_8Bit_MAX_SIZE 8
 #define UNPACK_16Bit_MAX_SIZE 16
 #define UNPACK_32Bit_MAX_SIZE 32

   class RleDecoderV2;

   class UnpackAvx512 {
    public:
     UnpackAvx512(RleDecoderV2* dec);
     ~UnpackAvx512();

     void vectorUnpack1(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack2(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack3(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack4(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack5(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack6(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack7(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack9(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack10(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack11(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack12(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack13(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack14(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack15(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack16(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack17(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack18(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack19(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack20(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack21(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack22(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack23(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack24(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack26(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack28(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack30(int64_t* data, uint64_t offset, uint64_t len);
     void vectorUnpack32(int64_t* data, uint64_t offset, uint64_t len);

     void plainUnpackLongs(int64_t* data, uint64_t offset, uint64_t len, uint64_t fbs,
                           uint64_t& startBit);

     /**
      * In the processing of AVX512 unpacking, AVX512 instructions can only process the memory align
      * data. It means that if data input is not memory align (@param startBit != 0), we need to
      * process the unaligned data. After that, it could be use AVX512 instructions to process these
      * memory align data.
      *
      * @tparam hasBitOffset If currently processed data has offset bits in one Byte, 8X-bit width
      * data will not have bits offset in one Byte, so it will be false. For other bits data, it will
      * be true.
      * @param bitWidth The unpacking data bit width
      * @param bitMaxSize The unpacking data needs the Max bit size (8X)
      * @param startBit The start bit position in one Byte
      * @param bufMoveByteLen In the current buffer, it will be processed/moved Bytes length in the
      * unpacking
      * @param bufRestByteLen In the current buffer, there will be some rest Bytes length after
      * unpacking
      * @param remainingNumElements After unpacking, the remaining elements number need to be
      * processed
      * @param tailBitLen After unpacking, the tail bits length
      * @param backupByteLen The backup Byte length after unpacking
      * @param numElements Currently, the number of elements need to be processed
      * @param resetBuf When the current buffer has already been processed, it need to be reset the
      * buffer
      * @param srcPtr the pointer of source data
      * @param dstPtr the pointer of destinative data
      */
     template <bool hasBitOffset>
     inline void alignHeaderBoundary(const uint32_t bitWidth, const uint32_t bitMaxSize,
                                     uint64_t& startBit, uint64_t& bufMoveByteLen,
                                     uint64_t& bufRestByteLen, uint64_t& remainingNumElements,
                                     uint64_t& tailBitLen, uint32_t& backupByteLen,
                                     uint64_t& numElements, bool& resetBuf, const uint8_t*& srcPtr,
                                     int64_t*& dstPtr);

     /**
      * After AVX512 unpacking processed, there could be some scattered data not be process,
      * it needs to be processed by the default way.
      *
      * @tparam hasBitOffset If currently processed data has offset bits in one Byte, 8X-bit width
      * data will not have bits offset in one Byte, so it will be false. For other bits data, it will
      * be true.
      * @param bitWidth The unpacking data bit width
      * @param specialBit 8X bit width data is the specialBit, they have the different unpackDefault
      * functions with others
      * @param startBit The start bit position in one Byte
      * @param bufMoveByteLen In the current buffer, it will be processed/moved Bytes length in the
      * unpacking
      * @param bufRestByteLen In the current buffer, there will be some rest Bytes length after
      * unpacking
      * @param remainingNumElements After unpacking, the remaining elements number need to be
      * processed
      * @param backupByteLen The backup Byte length after unpacking
      * @param numElements Currently, the number of elements need to be processed
      * @param resetBuf When the current buffer has already been processed, it need to be reset the
      * buffer
      * @param srcPtr the pointer of source data
      * @param dstPtr the pointer of destinative data
      */
     template <bool hasBitOffset>
     inline void alignTailerBoundary(const uint32_t bitWidth, const uint32_t specialBit,
                                     uint64_t& startBit, uint64_t& bufMoveByteLen,
                                     uint64_t& bufRestByteLen, uint64_t& remainingNumElements,
                                     uint32_t& backupByteLen, uint64_t& numElements, bool& resetBuf,
                                     const uint8_t*& srcPtr, int64_t*& dstPtr);

    private:
     RleDecoderV2* decoder;
     UnpackDefault unpackDefault;

     // Used by vectorized bit-unpacking data
     uint32_t vectorBuf[VECTOR_UNPACK_32BIT_MAX_NUM + 1];
   };

   class BitUnpackAVX512 : public BitUnpack {
    public:
     static void readLongs(RleDecoderV2* decoder, int64_t* data, uint64_t offset, uint64_t len,
                           uint64_t fbs);
   };

 }  // namespace orc

 #endif
	/**
	* Licensed to the Apache Software Foundation (ASF) under one
	* or more contributor license agreements. See the NOTICE file
	* distributed with this work for additional information
	* regarding copyright ownership. The ASF licenses this file
	* to you under the Apache License, Version 2.0 (the
	* "License"); you may not use this file except in compliance
	* with the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	#ifndef ORC_BPACKINGAVX512_HH
	#define ORC_BPACKINGAVX512_HH

	#include <cstdint>
	#include <cstdlib>

	#include "BpackingDefault.hh"

	namespace orc {

	#define VECTOR_UNPACK_8BIT_MAX_NUM 64
	#define VECTOR_UNPACK_16BIT_MAX_NUM 32
	#define VECTOR_UNPACK_32BIT_MAX_NUM 16
	#define UNPACK_8Bit_MAX_SIZE 8
	#define UNPACK_16Bit_MAX_SIZE 16
	#define UNPACK_32Bit_MAX_SIZE 32

	class RleDecoderV2;

	class UnpackAvx512 {
	public:
	UnpackAvx512(RleDecoderV2* dec);
	~UnpackAvx512();

	void vectorUnpack1(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack2(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack3(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack4(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack5(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack6(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack7(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack9(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack10(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack11(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack12(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack13(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack14(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack15(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack16(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack17(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack18(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack19(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack20(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack21(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack22(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack23(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack24(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack26(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack28(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack30(int64_t* data, uint64_t offset, uint64_t len);
	void vectorUnpack32(int64_t* data, uint64_t offset, uint64_t len);

	void plainUnpackLongs(int64_t* data, uint64_t offset, uint64_t len, uint64_t fbs,
	uint64_t& startBit);

	/**
	* In the processing of AVX512 unpacking, AVX512 instructions can only process the memory align
	* data. It means that if data input is not memory align (@param startBit != 0), we need to
	* process the unaligned data. After that, it could be use AVX512 instructions to process these
	* memory align data.
	*
	* @tparam hasBitOffset If currently processed data has offset bits in one Byte, 8X-bit width
	* data will not have bits offset in one Byte, so it will be false. For other bits data, it will
	* be true.
	* @param bitWidth The unpacking data bit width
	* @param bitMaxSize The unpacking data needs the Max bit size (8X)
	* @param startBit The start bit position in one Byte
	* @param bufMoveByteLen In the current buffer, it will be processed/moved Bytes length in the
	* unpacking
	* @param bufRestByteLen In the current buffer, there will be some rest Bytes length after
	* unpacking
	* @param remainingNumElements After unpacking, the remaining elements number need to be
	* processed
	* @param tailBitLen After unpacking, the tail bits length
	* @param backupByteLen The backup Byte length after unpacking
	* @param numElements Currently, the number of elements need to be processed
	* @param resetBuf When the current buffer has already been processed, it need to be reset the
	* buffer
	* @param srcPtr the pointer of source data
	* @param dstPtr the pointer of destinative data
	*/
	template <bool hasBitOffset>
	inline void alignHeaderBoundary(const uint32_t bitWidth, const uint32_t bitMaxSize,
	uint64_t& startBit, uint64_t& bufMoveByteLen,
	uint64_t& bufRestByteLen, uint64_t& remainingNumElements,
	uint64_t& tailBitLen, uint32_t& backupByteLen,
	uint64_t& numElements, bool& resetBuf, const uint8_t*& srcPtr,
	int64_t*& dstPtr);

	/**
	* After AVX512 unpacking processed, there could be some scattered data not be process,
	* it needs to be processed by the default way.
	*
	* @tparam hasBitOffset If currently processed data has offset bits in one Byte, 8X-bit width
	* data will not have bits offset in one Byte, so it will be false. For other bits data, it will
	* be true.
	* @param bitWidth The unpacking data bit width
	* @param specialBit 8X bit width data is the specialBit, they have the different unpackDefault
	* functions with others
	* @param startBit The start bit position in one Byte
	* @param bufMoveByteLen In the current buffer, it will be processed/moved Bytes length in the
	* unpacking
	* @param bufRestByteLen In the current buffer, there will be some rest Bytes length after
	* unpacking
	* @param remainingNumElements After unpacking, the remaining elements number need to be
	* processed
	* @param backupByteLen The backup Byte length after unpacking
	* @param numElements Currently, the number of elements need to be processed
	* @param resetBuf When the current buffer has already been processed, it need to be reset the
	* buffer
	* @param srcPtr the pointer of source data
	* @param dstPtr the pointer of destinative data
	*/
	template <bool hasBitOffset>
	inline void alignTailerBoundary(const uint32_t bitWidth, const uint32_t specialBit,
	uint64_t& startBit, uint64_t& bufMoveByteLen,
	uint64_t& bufRestByteLen, uint64_t& remainingNumElements,
	uint32_t& backupByteLen, uint64_t& numElements, bool& resetBuf,
	const uint8_t& srcPtr, int64_t& dstPtr);

	private:
	RleDecoderV2* decoder;
	UnpackDefault unpackDefault;

	// Used by vectorized bit-unpacking data
	uint32_t vectorBuf[VECTOR_UNPACK_32BIT_MAX_NUM + 1];
	};

	class BitUnpackAVX512 : public BitUnpack {
	public:
	static void readLongs(RleDecoderV2* decoder, int64_t* data, uint64_t offset, uint64_t len,
	uint64_t fbs);
	};

	} // namespace orc

	#endif