blob: 9d88149ee96e72e890cc08a1e9ea64b18c275d1f [file] [log] [blame]
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
#[cfg(feature = "simd")]
use crate::util::bit_util;
#[cfg(feature = "simd")]
use packed_simd::u8x64;
#[cfg(feature = "avx512")]
use crate::arch::avx512::*;
use crate::util::bit_util::ceil;
#[cfg(any(feature = "simd", feature = "avx512"))]
use std::borrow::BorrowMut;
use super::{Buffer, MutableBuffer};
/// Apply a bitwise operation `simd_op` / `scalar_op` to two inputs using simd instructions and return the result as a Buffer.
/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
/// and the `scalar_op` gets applied to remaining bytes.
/// Contrary to the non-simd version `bitwise_bin_op_helper`, the offset and length is specified in bytes
/// and this version does not support operations starting at arbitrary bit offsets.
#[cfg(feature = "simd")]
pub fn bitwise_bin_op_simd_helper<F_SIMD, F_SCALAR>(
left: &Buffer,
left_offset: usize,
right: &Buffer,
right_offset: usize,
len: usize,
simd_op: F_SIMD,
scalar_op: F_SCALAR,
) -> Buffer
where
F_SIMD: Fn(u8x64, u8x64) -> u8x64,
F_SCALAR: Fn(u8, u8) -> u8,
{
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let lanes = u8x64::lanes();
let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
let mut right_chunks = right.as_slice()[right_offset..].chunks_exact(lanes);
let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| {
unsafe { bit_util::bitwise_bin_op_simd(&left, &right, res, &simd_op) };
});
result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = scalar_op(*left, *right);
});
result.into()
}
/// Apply a bitwise operation `simd_op` / `scalar_op` to one input using simd instructions and return the result as a Buffer.
/// The `simd_op` functions gets applied on chunks of 64 bytes (512 bits) at a time
/// and the `scalar_op` gets applied to remaining bytes.
/// Contrary to the non-simd version `bitwise_unary_op_helper`, the offset and length is specified in bytes
/// and this version does not support operations starting at arbitrary bit offsets.
#[cfg(feature = "simd")]
pub fn bitwise_unary_op_simd_helper<F_SIMD, F_SCALAR>(
left: &Buffer,
left_offset: usize,
len: usize,
simd_op: F_SIMD,
scalar_op: F_SCALAR,
) -> Buffer
where
F_SIMD: Fn(u8x64) -> u8x64,
F_SCALAR: Fn(u8) -> u8,
{
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let lanes = u8x64::lanes();
let mut left_chunks = left.as_slice()[left_offset..].chunks_exact(lanes);
let mut result_chunks = result.as_slice_mut().chunks_exact_mut(lanes);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut())
.for_each(|(res, left)| unsafe {
let data_simd = u8x64::from_slice_unaligned_unchecked(left);
let simd_result = simd_op(data_simd);
simd_result.write_to_slice_unaligned_unchecked(res);
});
result_chunks
.into_remainder()
.iter_mut()
.zip(left_chunks.remainder().iter())
.for_each(|(res, left)| {
*res = scalar_op(*left);
});
result.into()
}
/// Apply a bitwise operation `op` to two inputs and return the result as a Buffer.
/// The inputs are treated as bitmaps, meaning that offsets and length are specified in number of bits.
pub fn bitwise_bin_op_helper<F>(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
op: F,
) -> Buffer
where
F: Fn(u64, u64) -> u64,
{
let left_chunks = left.bit_chunks(left_offset_in_bits, len_in_bits);
let right_chunks = right.bit_chunks(right_offset_in_bits, len_in_bits);
let chunks = left_chunks
.iter()
.zip(right_chunks.iter())
.map(|(left, right)| op(left, right));
// Soundness: `BitChunks` is a trusted len iterator
let mut buffer = unsafe { MutableBuffer::from_trusted_len_iter(chunks) };
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
let rem = op(left_chunks.remainder_bits(), right_chunks.remainder_bits());
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
let rem = &rem.to_le_bytes()[0..remainder_bytes];
buffer.extend_from_slice(rem);
buffer.into()
}
/// Apply a bitwise operation `op` to one input and return the result as a Buffer.
/// The input is treated as a bitmap, meaning that offset and length are specified in number of bits.
pub fn bitwise_unary_op_helper<F>(
left: &Buffer,
offset_in_bits: usize,
len_in_bits: usize,
op: F,
) -> Buffer
where
F: Fn(u64) -> u64,
{
// reserve capacity and set length so we can get a typed view of u64 chunks
let mut result =
MutableBuffer::new(ceil(len_in_bits, 8)).with_bitset(len_in_bits / 64 * 8, false);
let left_chunks = left.bit_chunks(offset_in_bits, len_in_bits);
let result_chunks = result.typed_data_mut::<u64>().iter_mut();
result_chunks
.zip(left_chunks.iter())
.for_each(|(res, left)| {
*res = op(left);
});
let remainder_bytes = ceil(left_chunks.remainder_len(), 8);
let rem = op(left_chunks.remainder_bits());
// we are counting its starting from the least significant bit, to to_le_bytes should be correct
let rem = &rem.to_le_bytes()[0..remainder_bytes];
result.extend_from_slice(rem);
result.into()
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
pub fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
let len = len_in_bits / 8;
let left_offset = left_offset_in_bits / 8;
let right_offset = right_offset_in_bits / 8;
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let mut left_chunks =
left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut right_chunks =
right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut result_chunks =
result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| unsafe {
avx512_bin_and(left, right, res);
});
result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = *left & *right;
});
result.into()
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}
}
#[cfg(all(feature = "simd", not(feature = "avx512")))]
pub fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
bitwise_bin_op_simd_helper(
&left,
left_offset_in_bits / 8,
&right,
right_offset_in_bits / 8,
len_in_bits / 8,
|a, b| a & b,
|a, b| a & b,
)
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}
}
// Note: do not target specific features like x86 without considering
// other targets like wasm32, as those would fail to build
#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
pub fn buffer_bin_and(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a & b,
)
}
#[cfg(all(target_arch = "x86_64", feature = "avx512"))]
pub fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
let len = len_in_bits / 8;
let left_offset = left_offset_in_bits / 8;
let right_offset = right_offset_in_bits / 8;
let mut result = MutableBuffer::new(len).with_bitset(len, false);
let mut left_chunks =
left.as_slice()[left_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut right_chunks =
right.as_slice()[right_offset..].chunks_exact(AVX512_U8X64_LANES);
let mut result_chunks =
result.as_slice_mut().chunks_exact_mut(AVX512_U8X64_LANES);
result_chunks
.borrow_mut()
.zip(left_chunks.borrow_mut().zip(right_chunks.borrow_mut()))
.for_each(|(res, (left, right))| unsafe {
avx512_bin_or(left, right, res);
});
result_chunks
.into_remainder()
.iter_mut()
.zip(
left_chunks
.remainder()
.iter()
.zip(right_chunks.remainder().iter()),
)
.for_each(|(res, (left, right))| {
*res = *left | *right;
});
result.into()
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a | b,
)
}
}
#[cfg(all(feature = "simd", not(feature = "avx512")))]
pub fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
if left_offset_in_bits % 8 == 0
&& right_offset_in_bits % 8 == 0
&& len_in_bits % 8 == 0
{
bitwise_bin_op_simd_helper(
&left,
left_offset_in_bits / 8,
&right,
right_offset_in_bits / 8,
len_in_bits / 8,
|a, b| a | b,
|a, b| a | b,
)
} else {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a | b,
)
}
}
#[cfg(all(not(any(feature = "simd", feature = "avx512"))))]
pub fn buffer_bin_or(
left: &Buffer,
left_offset_in_bits: usize,
right: &Buffer,
right_offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
bitwise_bin_op_helper(
&left,
left_offset_in_bits,
right,
right_offset_in_bits,
len_in_bits,
|a, b| a | b,
)
}
pub fn buffer_unary_not(
left: &Buffer,
offset_in_bits: usize,
len_in_bits: usize,
) -> Buffer {
// SIMD implementation if available and byte-aligned
#[cfg(feature = "simd")]
if offset_in_bits % 8 == 0 && len_in_bits % 8 == 0 {
return bitwise_unary_op_simd_helper(
&left,
offset_in_bits / 8,
len_in_bits / 8,
|a| !a,
|a| !a,
);
}
// Default implementation
#[allow(unreachable_code)]
{
bitwise_unary_op_helper(&left, offset_in_bits, len_in_bits, |a| !a)
}
}