blob: 39c6732c3231f0bdbb24d07e6d0d19308128a1f7 [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
use super::*;
#[cfg(feature = "simd")]
use packed_simd::*;
#[cfg(feature = "simd")]
use std::ops::{
Add, BitAnd, BitAndAssign, BitOr, BitOrAssign, Div, Mul, Neg, Not, Rem, Sub,
};
/// A subtype of primitive type that represents numeric values.
///
/// SIMD operations are defined in this trait if available on the target system.
#[cfg(feature = "simd")]
pub trait ArrowNumericType: ArrowPrimitiveType
where
Self::Simd: Add<Output = Self::Simd>
+ Sub<Output = Self::Simd>
+ Mul<Output = Self::Simd>
+ Div<Output = Self::Simd>
+ Rem<Output = Self::Simd>
+ Copy,
Self::SimdMask: BitAnd<Output = Self::SimdMask>
+ BitOr<Output = Self::SimdMask>
+ BitAndAssign
+ BitOrAssign
+ Not<Output = Self::SimdMask>
+ Copy,
{
/// Defines the SIMD type that should be used for this numeric type
type Simd;
/// Defines the SIMD Mask type that should be used for this numeric type
type SimdMask;
/// The number of SIMD lanes available
fn lanes() -> usize;
/// Initializes a SIMD register to a constant value
fn init(value: Self::Native) -> Self::Simd;
/// Loads a slice into a SIMD register
fn load(slice: &[Self::Native]) -> Self::Simd;
/// Creates a new SIMD mask for this SIMD type filling it with `value`
fn mask_init(value: bool) -> Self::SimdMask;
/// Creates a new SIMD mask for this SIMD type from the lower-most bits of the given `mask`.
/// The number of bits used corresponds to the number of lanes of this type
fn mask_from_u64(mask: u64) -> Self::SimdMask;
/// Creates a bitmask from the given SIMD mask.
/// Each bit corresponds to one vector lane, starting with the least-significant bit.
fn mask_to_u64(mask: &Self::SimdMask) -> u64;
/// Gets the value of a single lane in a SIMD mask
fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool;
/// Sets the value of a single lane of a SIMD mask
fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask;
/// Selects elements of `a` and `b` using `mask`
fn mask_select(mask: Self::SimdMask, a: Self::Simd, b: Self::Simd) -> Self::Simd;
/// Returns `true` if any of the lanes in the mask are `true`
fn mask_any(mask: Self::SimdMask) -> bool;
/// Performs a SIMD binary operation
fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
left: Self::Simd,
right: Self::Simd,
op: F,
) -> Self::Simd;
/// SIMD version of equal
fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
/// SIMD version of not equal
fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
/// SIMD version of less than
fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
/// SIMD version of less than or equal to
fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
/// SIMD version of greater than
fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
/// SIMD version of greater than or equal to
fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask;
/// Writes a SIMD result back to a slice
fn write(simd_result: Self::Simd, slice: &mut [Self::Native]);
fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(a: Self::Simd, op: F) -> Self::Simd;
}
#[cfg(not(feature = "simd"))]
pub trait ArrowNumericType: ArrowPrimitiveType {}
macro_rules! make_numeric_type {
($impl_ty:ty, $native_ty:ty, $simd_ty:ident, $simd_mask_ty:ident) => {
#[cfg(feature = "simd")]
impl ArrowNumericType for $impl_ty {
type Simd = $simd_ty;
type SimdMask = $simd_mask_ty;
#[inline]
fn lanes() -> usize {
Self::Simd::lanes()
}
#[inline]
fn init(value: Self::Native) -> Self::Simd {
Self::Simd::splat(value)
}
#[inline]
fn load(slice: &[Self::Native]) -> Self::Simd {
unsafe { Self::Simd::from_slice_unaligned_unchecked(slice) }
}
#[inline]
fn mask_init(value: bool) -> Self::SimdMask {
Self::SimdMask::splat(value)
}
#[inline]
fn mask_from_u64(mask: u64) -> Self::SimdMask {
// this match will get removed by the compiler since the number of lanes is known at
// compile-time for each concrete numeric type
match Self::lanes() {
8 => {
// the bit position in each lane indicates the index of that lane
let vecidx = i64x8::new(1, 2, 4, 8, 16, 32, 64, 128);
// broadcast the lowermost 8 bits of mask to each lane
let vecmask = i64x8::splat((mask & 0xFF) as i64);
// compute whether the bit corresponding to each lanes index is set
let vecmask = (vecidx & vecmask).eq(vecidx);
// transmute is necessary because the different match arms return different
// mask types, at runtime only one of those expressions will exist per type,
// with the type being equal to `SimdMask`.
unsafe { std::mem::transmute(vecmask) }
}
16 => {
// same general logic as for 8 lanes, extended to 16 bits
let vecidx = i32x16::new(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768,
);
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
unsafe { std::mem::transmute(vecmask) }
}
32 => {
// compute two separate m32x16 vector masks from from the lower-most 32 bits of `mask`
// and then combine them into one m16x32 vector mask by writing and reading a temporary
let tmp = &mut [0_i16; 32];
let vecidx = i32x16::new(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768,
);
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i16x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[0..16]);
let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i16x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[16..32]);
unsafe { std::mem::transmute(i16x32::from_slice_unaligned(tmp)) }
}
64 => {
// compute four m32x16 vector masks from from all 64 bits of `mask`
// and convert them into one m8x64 vector mask by writing and reading a temporary
let tmp = &mut [0_i8; 64];
let vecidx = i32x16::new(
1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096,
8192, 16384, 32768,
);
let vecmask = i32x16::splat((mask & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[0..16]);
let vecmask = i32x16::splat(((mask >> 16) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[16..32]);
let vecmask = i32x16::splat(((mask >> 32) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[32..48]);
let vecmask = i32x16::splat(((mask >> 48) & 0xFFFF) as i32);
let vecmask = (vecidx & vecmask).eq(vecidx);
i8x16::from_cast(vecmask)
.write_to_slice_unaligned(&mut tmp[48..64]);
unsafe { std::mem::transmute(i8x64::from_slice_unaligned(tmp)) }
}
_ => panic!("Invalid number of vector lanes"),
}
}
#[inline]
fn mask_to_u64(mask: &Self::SimdMask) -> u64 {
mask.bitmask() as u64
}
#[inline]
fn mask_get(mask: &Self::SimdMask, idx: usize) -> bool {
unsafe { mask.extract_unchecked(idx) }
}
#[inline]
fn mask_set(mask: Self::SimdMask, idx: usize, value: bool) -> Self::SimdMask {
unsafe { mask.replace_unchecked(idx, value) }
}
/// Selects elements of `a` and `b` using `mask`
#[inline]
fn mask_select(
mask: Self::SimdMask,
a: Self::Simd,
b: Self::Simd,
) -> Self::Simd {
mask.select(a, b)
}
#[inline]
fn mask_any(mask: Self::SimdMask) -> bool {
mask.any()
}
#[inline]
fn bin_op<F: Fn(Self::Simd, Self::Simd) -> Self::Simd>(
left: Self::Simd,
right: Self::Simd,
op: F,
) -> Self::Simd {
op(left, right)
}
#[inline]
fn eq(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
left.eq(right)
}
#[inline]
fn ne(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
left.ne(right)
}
#[inline]
fn lt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
left.lt(right)
}
#[inline]
fn le(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
left.le(right)
}
#[inline]
fn gt(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
left.gt(right)
}
#[inline]
fn ge(left: Self::Simd, right: Self::Simd) -> Self::SimdMask {
left.ge(right)
}
#[inline]
fn write(simd_result: Self::Simd, slice: &mut [Self::Native]) {
unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
}
#[inline]
fn unary_op<F: Fn(Self::Simd) -> Self::Simd>(
a: Self::Simd,
op: F,
) -> Self::Simd {
op(a)
}
}
#[cfg(not(feature = "simd"))]
impl ArrowNumericType for $impl_ty {}
};
}
make_numeric_type!(Int8Type, i8, i8x64, m8x64);
make_numeric_type!(Int16Type, i16, i16x32, m16x32);
make_numeric_type!(Int32Type, i32, i32x16, m32x16);
make_numeric_type!(Int64Type, i64, i64x8, m64x8);
make_numeric_type!(UInt8Type, u8, u8x64, m8x64);
make_numeric_type!(UInt16Type, u16, u16x32, m16x32);
make_numeric_type!(UInt32Type, u32, u32x16, m32x16);
make_numeric_type!(UInt64Type, u64, u64x8, m64x8);
make_numeric_type!(Float32Type, f32, f32x16, m32x16);
make_numeric_type!(Float64Type, f64, f64x8, m64x8);
make_numeric_type!(TimestampSecondType, i64, i64x8, m64x8);
make_numeric_type!(TimestampMillisecondType, i64, i64x8, m64x8);
make_numeric_type!(TimestampMicrosecondType, i64, i64x8, m64x8);
make_numeric_type!(TimestampNanosecondType, i64, i64x8, m64x8);
make_numeric_type!(Date32Type, i32, i32x16, m32x16);
make_numeric_type!(Date64Type, i64, i64x8, m64x8);
make_numeric_type!(Time32SecondType, i32, i32x16, m32x16);
make_numeric_type!(Time32MillisecondType, i32, i32x16, m32x16);
make_numeric_type!(Time64MicrosecondType, i64, i64x8, m64x8);
make_numeric_type!(Time64NanosecondType, i64, i64x8, m64x8);
make_numeric_type!(IntervalYearMonthType, i32, i32x16, m32x16);
make_numeric_type!(IntervalDayTimeType, i64, i64x8, m64x8);
make_numeric_type!(DurationSecondType, i64, i64x8, m64x8);
make_numeric_type!(DurationMillisecondType, i64, i64x8, m64x8);
make_numeric_type!(DurationMicrosecondType, i64, i64x8, m64x8);
make_numeric_type!(DurationNanosecondType, i64, i64x8, m64x8);
/// A subtype of primitive type that represents signed numeric values.
///
/// SIMD operations are defined in this trait if available on the target system.
#[cfg(feature = "simd")]
pub trait ArrowSignedNumericType: ArrowNumericType
where
Self::SignedSimd: Neg<Output = Self::SignedSimd>,
{
/// Defines the SIMD type that should be used for this numeric type
type SignedSimd;
/// Loads a slice of signed numeric type into a SIMD register
fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd;
/// Performs a SIMD unary operation on signed numeric type
fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
a: Self::SignedSimd,
op: F,
) -> Self::SignedSimd;
/// Writes a signed SIMD result back to a slice
fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]);
}
#[cfg(not(feature = "simd"))]
pub trait ArrowSignedNumericType: ArrowNumericType
where
Self::Native: std::ops::Neg<Output = Self::Native>,
{
}
macro_rules! make_signed_numeric_type {
($impl_ty:ty, $simd_ty:ident) => {
#[cfg(feature = "simd")]
impl ArrowSignedNumericType for $impl_ty {
type SignedSimd = $simd_ty;
#[inline]
fn load_signed(slice: &[Self::Native]) -> Self::SignedSimd {
unsafe { Self::SignedSimd::from_slice_unaligned_unchecked(slice) }
}
#[inline]
fn signed_unary_op<F: Fn(Self::SignedSimd) -> Self::SignedSimd>(
a: Self::SignedSimd,
op: F,
) -> Self::SignedSimd {
op(a)
}
#[inline]
fn write_signed(simd_result: Self::SignedSimd, slice: &mut [Self::Native]) {
unsafe { simd_result.write_to_slice_unaligned_unchecked(slice) };
}
}
#[cfg(not(feature = "simd"))]
impl ArrowSignedNumericType for $impl_ty {}
};
}
make_signed_numeric_type!(Int8Type, i8x64);
make_signed_numeric_type!(Int16Type, i16x32);
make_signed_numeric_type!(Int32Type, i32x16);
make_signed_numeric_type!(Int64Type, i64x8);
make_signed_numeric_type!(Float32Type, f32x16);
make_signed_numeric_type!(Float64Type, f64x8);
#[cfg(feature = "simd")]
pub trait ArrowFloatNumericType: ArrowNumericType {
fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd;
}
#[cfg(not(feature = "simd"))]
pub trait ArrowFloatNumericType: ArrowNumericType {}
macro_rules! make_float_numeric_type {
($impl_ty:ty, $simd_ty:ident) => {
#[cfg(feature = "simd")]
impl ArrowFloatNumericType for $impl_ty {
#[inline]
fn pow(base: Self::Simd, raise: Self::Simd) -> Self::Simd {
base.powf(raise)
}
}
#[cfg(not(feature = "simd"))]
impl ArrowFloatNumericType for $impl_ty {}
};
}
make_float_numeric_type!(Float32Type, f32x16);
make_float_numeric_type!(Float64Type, f64x8);
#[cfg(all(test, simd_x86))]
mod tests {
use crate::datatypes::{
ArrowNumericType, Float32Type, Float64Type, Int32Type, Int64Type, Int8Type,
UInt16Type,
};
use packed_simd::*;
use FromCast;
/// calculate the expected mask by iterating over all bits
macro_rules! expected_mask {
($T:ty, $MASK:expr) => {{
let mask = $MASK;
// simd width of all types is currently 64 bytes -> 512 bits
let lanes = 64 / std::mem::size_of::<$T>();
// translate each set bit into a value of all ones (-1) of the correct type
(0..lanes)
.map(|i| (if (mask & (1 << i)) != 0 { -1 } else { 0 }))
.collect::<Vec<$T>>()
}};
}
#[test]
fn test_mask_f64() {
let mask = 0b10101010;
let actual = Float64Type::mask_from_u64(mask);
let expected = expected_mask!(i64, mask);
let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
#[test]
fn test_mask_u64() {
let mask = 0b01010101;
let actual = Int64Type::mask_from_u64(mask);
let expected = expected_mask!(i64, mask);
let expected = m64x8::from_cast(i64x8::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
#[test]
fn test_mask_f32() {
let mask = 0b10101010_10101010;
let actual = Float32Type::mask_from_u64(mask);
let expected = expected_mask!(i32, mask);
let expected =
m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
#[test]
fn test_mask_i32() {
let mask = 0b01010101_01010101;
let actual = Int32Type::mask_from_u64(mask);
let expected = expected_mask!(i32, mask);
let expected =
m32x16::from_cast(i32x16::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
#[test]
fn test_mask_u16() {
let mask = 0b01010101_01010101_10101010_10101010;
let actual = UInt16Type::mask_from_u64(mask);
let expected = expected_mask!(i16, mask);
dbg!(&expected);
let expected =
m16x32::from_cast(i16x32::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
#[test]
fn test_mask_i8() {
let mask =
0b01010101_01010101_10101010_10101010_01010101_01010101_10101010_10101010;
let actual = Int8Type::mask_from_u64(mask);
let expected = expected_mask!(i8, mask);
let expected = m8x64::from_cast(i8x64::from_slice_unaligned(expected.as_slice()));
assert_eq!(expected, actual);
}
}