arrow/src/datatypes/numeric.rs - arrow-experimental-rs-arrow2 - Git at Google

 // Licensed to the Apache Software Foundation (ASF) under one
 // or more contributor license agreements.  See the NOTICE file
 // distributed with this work for additional information
 // regarding copyright ownership.  The ASF licenses this file
 // to you under the Apache License, Version 2.0 (the
 // "License"); you may not use this file except in compliance
 // with the License.  You may obtain a copy of the License at
 //
 //   http://www.apache.org/licenses/LICENSE-2.0
 //
 // Unless required by applicable law or agreed to in writing,
 // software distributed under the License is distributed on an
 // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 // KIND, either express or implied.  See the License for the
 // specific language governing permissions and limitations
 // under the License.

 #[cfg(feature = "simd")]
 use packed_simd::*;
 #[cfg(feature = "simd")]
 use std::ops::{Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Sub};

 use super::*;

 /// A subtype of primitive type that represents numeric values.
 ///
 /// SIMD operations are defined in this trait if available on the target system.
 #[cfg(simd)]
 pub trait ArrowNumericType: ArrowPrimitiveType
 where
     Self::Simd: Add<Output = Self::Simd>
         + Sub<Output = Self::Simd>
         + Mul<Output = Self::Simd>
         + Div<Output = Self::Simd>
         + Copy,
     Self::SimdMask: BitAnd<Output = Self::SimdMask>
         + BitOr<Output = Self::SimdMask>
         + BitAndAssign
         + BitOrAssign
         + Not<Output = Self::SimdMask>
         + Copy,
 {
     /// Defines the SIMD type that should be used for this numeric type
     type Simd;

     /// Defines the SIMD Mask type that should be used for this numeric type
     type SimdMask;

     /// The number of SIMD lanes available
     fn lanes() -> usize;

     /// Initializes a SIMD register to a constant value
     fn init(value: Self::Native) -> Self::Simd;

     /// Loads a slice into a SIMD register
     fn load(slice: &[Self::Native]) -> Self::Simd;

     /// Creates a new SIMD mask for this SIMD type filling it with `value`
     fn mask_init(value: bool) -> Self::SimdMask;

     /// Creates a new SIMD mask for this SIMD type from the lower-most bits of the given `mask`.
     /// The number of bits used corresponds to the number of lanes of this type
     fn mask_from_u64(mask: u64) -> Self::SimdMask;

     /// Creates a bitmask from the given SIMD mask.
     /// Each bit corresponds to one vector lane, starting with the least-significant bit.
     fn mask_to_u64(mask: &Self::SimdMask) -> u64;

     /// Gets the value of a single lane in a SIMD mask
     fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool;

     /// Sets the value of a single lane of a SIMD mask
     fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask;

     /// Selects elements of `a` and `b` using `mask`
     fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd;

     /// Returns `true` if any of the lanes in the mask are `true`
     fn mask_any(mask: Self::SimdMask) -> bool;

     /// Performs a SIMD binary operation
     fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
         left: Self::Simd,
         right: Self::Simd,
         op: F,
     ) -> Self::Simd;

     /// SIMD version of equal
     fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

     /// SIMD version of not equal
     fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

     /// SIMD version of less than
     fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

     /// SIMD version of less than or equal to
     fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

     /// SIMD version of greater than
     fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

     /// SIMD version of greater than or equal to
     fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

     /// Writes a SIMD result back to a slice
     fn write(simd_result: Self::Simd, slice: &mut [Self::Native]);

     fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd;
 }

 #[cfg(not(simd))]
 pub trait ArrowNumericType: ArrowPrimitiveType {}

 macro_rules! make_numeric_type {
     ($impl_ty:ty, $native_ty:ty, $simd_ty:ident, $simd_mask_ty:ident) => {
         #[cfg(simd)]
         impl ArrowNumericType for $impl_ty {
             type Simd = $simd_ty;

             type SimdMask = $simd_mask_ty;

             #[inline]
             fn lanes() -> usize {
                 Self::Simd::lanes()
             }

             #[inline]
             fn init(value: Self::Native) -> Self::Simd {
                 Self::Simd::splat(value)
             }

             #[inline]
             fn load(slice: &[Self::Native]) -> Self::Simd {
                 unsafe { Self::Simd::from_slice_unaligned_unchecked(slice) }
             }

             #[inline]
             fn mask_init(value: bool) -> Self::SimdMask {
                 Self::SimdMask::splat(value)
             }

             #[inline]
             fn mask_from_u64(mask: u64) -> Self::SimdMask {
                 // this match will get removed by the compiler since the number of lanes is known at
                 // compile-time for each concrete numeric type
                 match Self::lanes() {
                     8 => {
                         // the bit position in each lane indicates the index of that lane
                         let vecidx = i64x8::new(1, 2, 4, 8, 16, 32, 64, 128);

                         // broadcast the lowermost 8 bits of mask to each lane
                         let vecmask = i64x8::splat((mask & 0xFF) as i64);
                         // compute whether the bit corresponding to each lanes index is set
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         // transmute is necessary because the different match arms return different
                         // mask types, at runtime only one of those expressions will exist per type,
                         // with the type being equal to `SimdMask`.
                         unsafe { std::mem::transmute(vecmask) }
                     }
                     16 => {
                         // same general logic as for 8 lanes, extended to 16 bits
                         let vecidx = i32x16::new(
                             1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
                             8192, 16384, 32768,
                         );

                         let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         unsafe { std::mem::transmute(vecmask) }
                     }
                     32 => {
                         // compute two separate m32x16 vector masks from  from the lower-most 32 bits of `mask`
                         // and then combine them into one m16x32 vector mask by writing and reading a temporary
                         let tmp = &mut [0_i16; 32];

                         let vecidx = i32x16::new(
                             1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
                             8192, 16384, 32768,
                         );

                         let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         i16x16::from_cast(vecmask)
                             .write_to_slice_unaligned(&mut tmp[0..16]);

                         let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         i16x16::from_cast(vecmask)
                             .write_to_slice_unaligned(&mut tmp[16..32]);

                         unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
                     }
                     64 => {
                         // compute four m32x16 vector masks from  from all 64 bits of `mask`
                         // and convert them into one m8x64 vector mask by writing and reading a temporary
                         let tmp = &mut [0_i8; 64];

                         let vecidx = i32x16::new(
                             1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
                             8192, 16384, 32768,
                         );

                         let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         i8x16::from_cast(vecmask)
                             .write_to_slice_unaligned(&mut tmp[0..16]);

                         let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         i8x16::from_cast(vecmask)
                             .write_to_slice_unaligned(&mut tmp[16..32]);

                         let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         i8x16::from_cast(vecmask)
                             .write_to_slice_unaligned(&mut tmp[32..48]);

                         let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
                         let vecmask = (vecidx & vecmask).eq(vecidx);

                         i8x16::from_cast(vecmask)
                             .write_to_slice_unaligned(&mut tmp[48..64]);

                         unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
                     }
                     _ => panic!("Invalid number of vector lanes"),
                 }
             }

             #[inline]
             fn mask_to_u64(mask: &Self::SimdMask) -> u64 {
                 mask.bitmask() as u64
             }

             #[inline]
             fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool {
                 unsafe { mask.extract_unchecked(idx) }
             }

             #[inline]
             fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask {
                 unsafe { mask.replace_unchecked(idx, value) }
             }

             /// Selects elements of `a` and `b` using `mask`
             #[inline]
             fn mask_select(
                 mask: Self::SimdMask,
                 a: Self::Simd,
                 b: Self::Simd,
             ) -> Self::Simd {
                 mask.select(a, b)
             }

             #[inline]
             fn mask_any(mask: Self::SimdMask) -> bool {
                 mask.any()
             }

             #[inline]
             fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
                 left: Self::Simd,
                 right: Self::Simd,
                 op: F,
             ) -> Self::Simd {
                 op(left, right)
             }

             #[inline]
             fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
                 left.eq(right)
             }

             #[inline]
             fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
                 left.ne(right)
             }

             #[inline]
             fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
                 left.lt(right)
             }

             #[inline]
             fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
                 left.le(right)
             }

             #[inline]
             fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
                 left.gt(right)
             }

             #[inline]
             fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
                 left.ge(right)
             }

             #[inline]
             fn write(simd_result: Self::Simd, slice: &mut [Self::Native]) {
                 unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
             }

             #[inline]
             fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
                 a: Self::Simd,
                 op: F,
             ) -> Self::Simd {
                 op(a)
             }
         }

         #[cfg(not(simd))]
         impl ArrowNumericType for $impl_ty {}
     };
 }

 make_numeric_type!(Int8Type, i8, i8x64, m8x64);
 make_numeric_type!(Int16Type, i16, i16x32, m16x32);
 make_numeric_type!(Int32Type, i32, i32x16, m32x16);
 make_numeric_type!(Int64Type, i64, i64x8, m64x8);
 make_numeric_type!(UInt8Type, u8, u8x64, m8x64);
 make_numeric_type!(UInt16Type, u16, u16x32, m16x32);
 make_numeric_type!(UInt32Type, u32, u32x16, m32x16);
 make_numeric_type!(UInt64Type, u64, u64x8, m64x8);
 make_numeric_type!(Float32Type, f32, f32x16, m32x16);
 make_numeric_type!(Float64Type, f64, f64x8, m64x8);

 make_numeric_type!(TimestampSecondType, i64, i64x8, m64x8);
 make_numeric_type!(TimestampMillisecondType, i64, i64x8, m64x8);
 make_numeric_type!(TimestampMicrosecondType, i64, i64x8, m64x8);
 make_numeric_type!(TimestampNanosecondType, i64, i64x8, m64x8);
 make_numeric_type!(Date32Type, i32, i32x16, m32x16);
 make_numeric_type!(Date64Type, i64, i64x8, m64x8);
 make_numeric_type!(Time32SecondType, i32, i32x16, m32x16);
 make_numeric_type!(Time32MillisecondType, i32, i32x16, m32x16);
 make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8);
 make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8);
 make_numeric_type!(IntervalYearMonthType, i32, i32x16, m32x16);
 make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8);
 make_numeric_type!(DurationSecondType, i64, i64x8, m64x8);
 make_numeric_type!(DurationMillisecondType, i64, i64x8, m64x8);
 make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8);
 make_numeric_type!(DurationNanosecondType, i64, i64x8, m64x8);

 /// A subtype of primitive type that represents signed numeric values.
 ///
 /// SIMD operations are defined in this trait if available on the target system.
 #[cfg(simd)]
 pub trait ArrowSignedNumericType: ArrowNumericType
 where
     Self::SignedSimd: Neg<Output = Self::SignedSimd>,
 {
     /// Defines the SIMD type that should be used for this numeric type
     type SignedSimd;

     /// Loads a slice of signed numeric type into a SIMD register
     fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd;

     /// Performs a SIMD unary operation on signed numeric type
     fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
         a: Self::SignedSimd,
         op: F,
     ) -> Self::SignedSimd;

     /// Writes a signed SIMD result back to a slice
     fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]);
 }

 #[cfg(not(simd))]
 pub trait ArrowSignedNumericType: ArrowNumericType
 where
     Self::Native: std::ops::Neg<Output = Self::Native>,
 {
 }

 macro_rules! make_signed_numeric_type {
     ($impl_ty:ty, $simd_ty:ident) => {
         #[cfg(simd)]
         impl ArrowSignedNumericType for $impl_ty {
             type SignedSimd = $simd_ty;

             #[inline]
             fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd {
                 unsafe { Self::SignedSimd::from_slice_unaligned_unchecked(slice) }
             }

             #[inline]
             fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
                 a: Self::SignedSimd,
                 op: F,
             ) -> Self::SignedSimd {
                 op(a)
             }

             #[inline]
             fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]) {
                 unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
             }
         }

         #[cfg(not(simd))]
         impl ArrowSignedNumericType for $impl_ty {}
     };
 }

 make_signed_numeric_type!(Int8Type, i8x64);
 make_signed_numeric_type!(Int16Type, i16x32);
 make_signed_numeric_type!(Int32Type, i32x16);
 make_signed_numeric_type!(Int64Type, i64x8);
 make_signed_numeric_type!(Float32Type, f32x16);
 make_signed_numeric_type!(Float64Type, f64x8);

 #[cfg(simd)]
 pub trait ArrowFloatNumericType: ArrowNumericType {
     fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd;
 }

 #[cfg(not(simd))]
 pub trait ArrowFloatNumericType: ArrowNumericType {}

 macro_rules! make_float_numeric_type {
     ($impl_ty:ty, $simd_ty:ident) => {
         #[cfg(simd)]
         impl ArrowFloatNumericType for $impl_ty {
             #[inline]
             fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd {
                 base.powf(raise)
             }
         }

         #[cfg(not(simd))]
         impl ArrowFloatNumericType for $impl_ty {}
     };
 }

 make_float_numeric_type!(Float32Type, f32x16);
 make_float_numeric_type!(Float64Type, f64x8);

 #[cfg(all(test, simd_x86))]
 mod tests {
     use crate::datatypes::{
         ArrowNumericType, Float32Type, Float64Type, Int32Type, Int64Type, Int8Type,
         UInt16Type,
     };
     use packed_simd::*;
     use FromCast;

     /// calculate the expected mask by iterating over all bits
     macro_rules! expected_mask {
         ($T:ty, $MASK:expr) => {{
             let mask = $MASK;
             // simd width of all types is currently 64 bytes -> 512 bits
             let lanes = 64 / std::mem::size_of::<$T>();
             // translate each set bit into a value of all ones (-1) of the correct type
             (0..lanes)
                 .map(|i| (if (mask & (1 << i)) != 0 { -1 } else { 0 }))
                 .collect::<Vec<$T>>()
         }};
     }

     #[test]
     fn test_mask_f64() {
         let mask = 0b10101010;
         let actual = Float64Type::mask_from_u64(mask);
         let expected = expected_mask!(i64, mask);
         let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));

         assert_eq!(expected, actual);
     }

     #[test]
     fn test_mask_u64() {
         let mask = 0b01010101;
         let actual = Int64Type::mask_from_u64(mask);
         let expected = expected_mask!(i64, mask);
         let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));

         assert_eq!(expected, actual);
     }

     #[test]
     fn test_mask_f32() {
         let mask = 0b10101010_10101010;
         let actual = Float32Type::mask_from_u64(mask);
         let expected = expected_mask!(i32, mask);
         let expected =
             m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));

         assert_eq!(expected, actual);
     }

     #[test]
     fn test_mask_i32() {
         let mask = 0b01010101_01010101;
         let actual = Int32Type::mask_from_u64(mask);
         let expected = expected_mask!(i32, mask);
         let expected =
             m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));

         assert_eq!(expected, actual);
     }

     #[test]
     fn test_mask_u16() {
         let mask = 0b01010101_01010101_10101010_10101010;
         let actual = UInt16Type::mask_from_u64(mask);
         let expected = expected_mask!(i16, mask);
         dbg!(&expected);
         let expected =
             m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));

         assert_eq!(expected, actual);
     }

     #[test]
     fn test_mask_i8() {
         let mask =
             0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
         let actual = Int8Type::mask_from_u64(mask);
         let expected = expected_mask!(i8, mask);
         let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));

         assert_eq!(expected, actual);
     }
 }
	// Licensed to the Apache Software Foundation (ASF) under one
	// or more contributor license agreements. See the NOTICE file
	// distributed with this work for additional information
	// regarding copyright ownership. The ASF licenses this file
	// to you under the Apache License, Version 2.0 (the
	// "License"); you may not use this file except in compliance
	// with the License. You may obtain a copy of the License at
	//
	// http://www.apache.org/licenses/LICENSE-2.0
	//
	// Unless required by applicable law or agreed to in writing,
	// software distributed under the License is distributed on an
	// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	// KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations
	// under the License.

	#[cfg(feature = "simd")]
	use packed_simd::*;
	#[cfg(feature = "simd")]
	use std::ops::{Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Sub};

	use super::*;

	/// A subtype of primitive type that represents numeric values.
	///
	/// SIMD operations are defined in this trait if available on the target system.
	#[cfg(simd)]
	pub trait ArrowNumericType: ArrowPrimitiveType
	where
	Self::Simd: Add<Output = Self::Simd>
	+ Sub<Output = Self::Simd>
	+ Mul<Output = Self::Simd>
	+ Div<Output = Self::Simd>
	+ Copy,
	Self::SimdMask: BitAnd<Output = Self::SimdMask>
	+ BitOr<Output = Self::SimdMask>
	+ BitAndAssign
	+ BitOrAssign
	+ Not<Output = Self::SimdMask>
	+ Copy,
	{
	/// Defines the SIMD type that should be used for this numeric type
	type Simd;

	/// Defines the SIMD Mask type that should be used for this numeric type
	type SimdMask;

	/// The number of SIMD lanes available
	fn lanes() -> usize;

	/// Initializes a SIMD register to a constant value
	fn init(value: Self::Native) -> Self::Simd;

	/// Loads a slice into a SIMD register
	fn load(slice: &[Self::Native]) -> Self::Simd;

	/// Creates a new SIMD mask for this SIMD type filling it with `value`
	fn mask_init(value: bool) -> Self::SimdMask;

	/// Creates a new SIMD mask for this SIMD type from the lower-most bits of the given `mask`.
	/// The number of bits used corresponds to the number of lanes of this type
	fn mask_from_u64(mask: u64) -> Self::SimdMask;

	/// Creates a bitmask from the given SIMD mask.
	/// Each bit corresponds to one vector lane, starting with the least-significant bit.
	fn mask_to_u64(mask: &Self::SimdMask) -> u64;

	/// Gets the value of a single lane in a SIMD mask
	fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool;

	/// Sets the value of a single lane of a SIMD mask
	fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask;

	/// Selects elements of `a` and `b` using `mask`
	fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd;

	/// Returns `true` if any of the lanes in the mask are `true`
	fn mask_any(mask: Self::SimdMask) -> bool;

	/// Performs a SIMD binary operation
	fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
	left: Self::Simd,
	right: Self::Simd,
	op: F,
	) -> Self::Simd;

	/// SIMD version of equal
	fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

	/// SIMD version of not equal
	fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

	/// SIMD version of less than
	fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

	/// SIMD version of less than or equal to
	fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

	/// SIMD version of greater than
	fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

	/// SIMD version of greater than or equal to
	fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;

	/// Writes a SIMD result back to a slice
	fn write(simd_result: Self::Simd, slice: &mut [Self::Native]);

	fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd;
	}

	#[cfg(not(simd))]
	pub trait ArrowNumericType: ArrowPrimitiveType {}

	macro_rules! make_numeric_type {
	($impl_ty:ty, $native_ty:ty, $simd_ty:ident, $simd_mask_ty:ident) => {
	#[cfg(simd)]
	impl ArrowNumericType for $impl_ty {
	type Simd = $simd_ty;

	type SimdMask = $simd_mask_ty;

	#[inline]
	fn lanes() -> usize {
	Self::Simd::lanes()
	}

	#[inline]
	fn init(value: Self::Native) -> Self::Simd {
	Self::Simd::splat(value)
	}

	#[inline]
	fn load(slice: &[Self::Native]) -> Self::Simd {
	unsafe { Self::Simd::from_slice_unaligned_unchecked(slice) }
	}

	#[inline]
	fn mask_init(value: bool) -> Self::SimdMask {
	Self::SimdMask::splat(value)
	}

	#[inline]
	fn mask_from_u64(mask: u64) -> Self::SimdMask {
	// this match will get removed by the compiler since the number of lanes is known at
	// compile-time for each concrete numeric type
	match Self::lanes() {
	8 => {
	// the bit position in each lane indicates the index of that lane
	let vecidx = i64x8::new(1, 2, 4, 8, 16, 32, 64, 128);

	// broadcast the lowermost 8 bits of mask to each lane
	let vecmask = i64x8::splat((mask & 0xFF) as i64);
	// compute whether the bit corresponding to each lanes index is set
	let vecmask = (vecidx & vecmask).eq(vecidx);

	// transmute is necessary because the different match arms return different
	// mask types, at runtime only one of those expressions will exist per type,
	// with the type being equal to `SimdMask`.
	unsafe { std::mem::transmute(vecmask) }
	}
	16 => {
	// same general logic as for 8 lanes, extended to 16 bits
	let vecidx = i32x16::new(
	1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
	8192, 16384, 32768,
	);

	let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
	let vecmask = (vecidx & vecmask).eq(vecidx);

	unsafe { std::mem::transmute(vecmask) }
	}
	32 => {
	// compute two separate m32x16 vector masks from from the lower-most 32 bits of `mask`
	// and then combine them into one m16x32 vector mask by writing and reading a temporary
	let tmp = &mut [0_i16; 32];

	let vecidx = i32x16::new(
	1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
	8192, 16384, 32768,
	);

	let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
	let vecmask = (vecidx & vecmask).eq(vecidx);

	i16x16::from_cast(vecmask)
	.write_to_slice_unaligned(&mut tmp[0..16]);

	let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
	let vecmask = (vecidx & vecmask).eq(vecidx);

	i16x16::from_cast(vecmask)
	.write_to_slice_unaligned(&mut tmp[16..32]);

	unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
	}
	64 => {
	// compute four m32x16 vector masks from from all 64 bits of `mask`
	// and convert them into one m8x64 vector mask by writing and reading a temporary
	let tmp = &mut [0_i8; 64];

	let vecidx = i32x16::new(
	1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
	8192, 16384, 32768,
	);

	let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
	let vecmask = (vecidx & vecmask).eq(vecidx);

	i8x16::from_cast(vecmask)
	.write_to_slice_unaligned(&mut tmp[0..16]);

	let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
	let vecmask = (vecidx & vecmask).eq(vecidx);

	i8x16::from_cast(vecmask)
	.write_to_slice_unaligned(&mut tmp[16..32]);

	let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
	let vecmask = (vecidx & vecmask).eq(vecidx);

	i8x16::from_cast(vecmask)
	.write_to_slice_unaligned(&mut tmp[32..48]);

	let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
	let vecmask = (vecidx & vecmask).eq(vecidx);

	i8x16::from_cast(vecmask)
	.write_to_slice_unaligned(&mut tmp[48..64]);

	unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
	}
	_ => panic!("Invalid number of vector lanes"),
	}
	}

	#[inline]
	fn mask_to_u64(mask: &Self::SimdMask) -> u64 {
	mask.bitmask() as u64
	}

	#[inline]
	fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool {
	unsafe { mask.extract_unchecked(idx) }
	}

	#[inline]
	fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask {
	unsafe { mask.replace_unchecked(idx, value) }
	}

	/// Selects elements of `a` and `b` using `mask`
	#[inline]
	fn mask_select(
	mask: Self::SimdMask,
	a: Self::Simd,
	b: Self::Simd,
	) -> Self::Simd {
	mask.select(a, b)
	}

	#[inline]
	fn mask_any(mask: Self::SimdMask) -> bool {
	mask.any()
	}

	#[inline]
	fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
	left: Self::Simd,
	right: Self::Simd,
	op: F,
	) -> Self::Simd {
	op(left, right)
	}

	#[inline]
	fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
	left.eq(right)
	}

	#[inline]
	fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
	left.ne(right)
	}

	#[inline]
	fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
	left.lt(right)
	}

	#[inline]
	fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
	left.le(right)
	}

	#[inline]
	fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
	left.gt(right)
	}

	#[inline]
	fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
	left.ge(right)
	}

	#[inline]
	fn write(simd_result: Self::Simd, slice: &mut [Self::Native]) {
	unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
	}

	#[inline]
	fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
	a: Self::Simd,
	op: F,
	) -> Self::Simd {
	op(a)
	}
	}

	#[cfg(not(simd))]
	impl ArrowNumericType for $impl_ty {}
	};
	}

	make_numeric_type!(Int8Type, i8, i8x64, m8x64);
	make_numeric_type!(Int16Type, i16, i16x32, m16x32);
	make_numeric_type!(Int32Type, i32, i32x16, m32x16);
	make_numeric_type!(Int64Type, i64, i64x8, m64x8);
	make_numeric_type!(UInt8Type, u8, u8x64, m8x64);
	make_numeric_type!(UInt16Type, u16, u16x32, m16x32);
	make_numeric_type!(UInt32Type, u32, u32x16, m32x16);
	make_numeric_type!(UInt64Type, u64, u64x8, m64x8);
	make_numeric_type!(Float32Type, f32, f32x16, m32x16);
	make_numeric_type!(Float64Type, f64, f64x8, m64x8);

	make_numeric_type!(TimestampSecondType, i64, i64x8, m64x8);
	make_numeric_type!(TimestampMillisecondType, i64, i64x8, m64x8);
	make_numeric_type!(TimestampMicrosecondType, i64, i64x8, m64x8);
	make_numeric_type!(TimestampNanosecondType, i64, i64x8, m64x8);
	make_numeric_type!(Date32Type, i32, i32x16, m32x16);
	make_numeric_type!(Date64Type, i64, i64x8, m64x8);
	make_numeric_type!(Time32SecondType, i32, i32x16, m32x16);
	make_numeric_type!(Time32MillisecondType, i32, i32x16, m32x16);
	make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8);
	make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8);
	make_numeric_type!(IntervalYearMonthType, i32, i32x16, m32x16);
	make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8);
	make_numeric_type!(DurationSecondType, i64, i64x8, m64x8);
	make_numeric_type!(DurationMillisecondType, i64, i64x8, m64x8);
	make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8);
	make_numeric_type!(DurationNanosecondType, i64, i64x8, m64x8);

	/// A subtype of primitive type that represents signed numeric values.
	///
	/// SIMD operations are defined in this trait if available on the target system.
	#[cfg(simd)]
	pub trait ArrowSignedNumericType: ArrowNumericType
	where
	Self::SignedSimd: Neg<Output = Self::SignedSimd>,
	{
	/// Defines the SIMD type that should be used for this numeric type
	type SignedSimd;

	/// Loads a slice of signed numeric type into a SIMD register
	fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd;

	/// Performs a SIMD unary operation on signed numeric type
	fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
	a: Self::SignedSimd,
	op: F,
	) -> Self::SignedSimd;

	/// Writes a signed SIMD result back to a slice
	fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]);
	}

	#[cfg(not(simd))]
	pub trait ArrowSignedNumericType: ArrowNumericType
	where
	Self::Native: std::ops::Neg<Output = Self::Native>,
	{
	}

	macro_rules! make_signed_numeric_type {
	($impl_ty:ty, $simd_ty:ident) => {
	#[cfg(simd)]
	impl ArrowSignedNumericType for $impl_ty {
	type SignedSimd = $simd_ty;

	#[inline]
	fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd {
	unsafe { Self::SignedSimd::from_slice_unaligned_unchecked(slice) }
	}

	#[inline]
	fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
	a: Self::SignedSimd,
	op: F,
	) -> Self::SignedSimd {
	op(a)
	}

	#[inline]
	fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]) {
	unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
	}
	}

	#[cfg(not(simd))]
	impl ArrowSignedNumericType for $impl_ty {}
	};
	}

	make_signed_numeric_type!(Int8Type, i8x64);
	make_signed_numeric_type!(Int16Type, i16x32);
	make_signed_numeric_type!(Int32Type, i32x16);
	make_signed_numeric_type!(Int64Type, i64x8);
	make_signed_numeric_type!(Float32Type, f32x16);
	make_signed_numeric_type!(Float64Type, f64x8);

	#[cfg(simd)]
	pub trait ArrowFloatNumericType: ArrowNumericType {
	fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd;
	}

	#[cfg(not(simd))]
	pub trait ArrowFloatNumericType: ArrowNumericType {}

	macro_rules! make_float_numeric_type {
	($impl_ty:ty, $simd_ty:ident) => {
	#[cfg(simd)]
	impl ArrowFloatNumericType for $impl_ty {
	#[inline]
	fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd {
	base.powf(raise)
	}
	}

	#[cfg(not(simd))]
	impl ArrowFloatNumericType for $impl_ty {}
	};
	}

	make_float_numeric_type!(Float32Type, f32x16);
	make_float_numeric_type!(Float64Type, f64x8);

	#[cfg(all(test, simd_x86))]
	mod tests {
	use crate::datatypes::{
	ArrowNumericType, Float32Type, Float64Type, Int32Type, Int64Type, Int8Type,
	UInt16Type,
	};
	use packed_simd::*;
	use FromCast;

	/// calculate the expected mask by iterating over all bits
	macro_rules! expected_mask {
	($T:ty, $MASK:expr) => {{
	let mask = $MASK;
	// simd width of all types is currently 64 bytes -> 512 bits
	let lanes = 64 / std::mem::size_of::<$T>();
	// translate each set bit into a value of all ones (-1) of the correct type
	(0..lanes)
	.map(\|i\| (if (mask & (1 << i)) != 0 { -1 } else { 0 }))
	.collect::<Vec<$T>>()
	}};
	}

	#[test]
	fn test_mask_f64() {
	let mask = 0b10101010;
	let actual = Float64Type::mask_from_u64(mask);
	let expected = expected_mask!(i64, mask);
	let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));

	assert_eq!(expected, actual);
	}

	#[test]
	fn test_mask_u64() {
	let mask = 0b01010101;
	let actual = Int64Type::mask_from_u64(mask);
	let expected = expected_mask!(i64, mask);
	let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));

	assert_eq!(expected, actual);
	}

	#[test]
	fn test_mask_f32() {
	let mask = 0b10101010_10101010;
	let actual = Float32Type::mask_from_u64(mask);
	let expected = expected_mask!(i32, mask);
	let expected =
	m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));

	assert_eq!(expected, actual);
	}

	#[test]
	fn test_mask_i32() {
	let mask = 0b01010101_01010101;
	let actual = Int32Type::mask_from_u64(mask);
	let expected = expected_mask!(i32, mask);
	let expected =
	m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));

	assert_eq!(expected, actual);
	}

	#[test]
	fn test_mask_u16() {
	let mask = 0b01010101_01010101_10101010_10101010;
	let actual = UInt16Type::mask_from_u64(mask);
	let expected = expected_mask!(i16, mask);
	dbg!(&expected);
	let expected =
	m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));

	assert_eq!(expected, actual);
	}

	#[test]
	fn test_mask_i8() {
	let mask =
	0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
	let actual = Int8Type::mask_from_u64(mask);
	let expected = expected_mask!(i8, mask);
	let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));

	assert_eq!(expected, actual);
	}
	}