blob: 2df018706e8cb62d38e34ae19df8fc0c50ed5ac3 [file] [log] [blame]
extern crate matrixmultiply;
pub use matrixmultiply::sgemm;
pub use matrixmultiply::dgemm;
#[macro_use]
extern crate bencher;
// Compute GFlop/s
// by flop / s = 2 M N K / time
macro_rules! mat_mul {
($modname:ident, $gemm:ident, $(($name:ident, $m:expr, $n:expr, $k:expr))+) => {
mod $modname {
use bencher::{Bencher};
use $gemm;
$(
pub fn $name(bench: &mut Bencher)
{
let a = vec![0.; $m * $n];
let b = vec![0.; $n * $k];
let mut c = vec![0.; $m * $k];
bench.iter(|| {
unsafe {
$gemm(
$m, $n, $k,
1.,
a.as_ptr(), $n, 1,
b.as_ptr(), $k, 1,
0.,
c.as_mut_ptr(), $k, 1,
)
}
});
}
)+
}
benchmark_group!{ $modname, $($modname::$name),+ }
};
}
benchmark_main!(mat_mul_f32, mat_mul_f64, ref_mat_mul_f32);
mat_mul!{mat_mul_f32, sgemm,
(m004, 4, 4, 4)
(m005, 5, 5, 5)
(m006, 6, 6, 6)
(m007, 7, 7, 7)
(m008, 8, 8, 8)
(m009, 9, 9, 9)
(m012, 12, 12, 12)
(m016, 16, 16, 16)
(m032, 32, 32, 32)
(m064, 64, 64, 64)
(m127, 127, 127, 127)
(m256, 256, 256, 256)
(m512, 512, 512, 512)
(mix16x4, 32, 4, 32)
(mix32x2, 32, 2, 32)
(mix97, 97, 97, 125)
(mix128x10000x128, 128, 10000, 128)
}
mat_mul!{mat_mul_f64, dgemm,
(m004, 4, 4, 4)
(m007, 7, 7, 7)
(m008, 8, 8, 8)
(m012, 12, 12, 12)
(m016, 16, 16, 16)
(m032, 32, 32, 32)
(m064, 64, 64, 64)
(m127, 127, 127, 127)
(m256, 256, 256, 256)
(m512, 512, 512, 512)
(mix16x4, 32, 4, 32)
(mix32x2, 32, 2, 32)
(mix97, 97, 97, 125)
(mix128x10000x128, 128, 10000, 128)
}
use std::ops::{Add, Mul};
trait Z {
fn zero() -> Self;
}
impl Z for f32 { fn zero() -> Self { 0. } }
impl Z for f64 { fn zero() -> Self { 0. } }
// simple, slow, correct (hopefully) mat mul (Row Major)
#[inline(never)]
fn reference_mat_mul<A>(m: usize, k: usize, n: usize, a: &[A], b: &[A], c: &mut [A])
where A: Z + Add<Output=A> + Mul<Output=A> + Copy,
{
assert!(a.len() >= m * k);
assert!(b.len() >= k * n);
assert!(c.len() >= m * n);
for i in 0..m {
for j in 0..n {
unsafe {
let celt = c.get_unchecked_mut(i * m + j);
*celt = (0..k).fold(A::zero(),
move |s, x| s + *a.get_unchecked(i * k + x) * *b.get_unchecked(x * n + j));
}
}
}
}
macro_rules! ref_mat_mul {
($modname:ident, $ty:ty, $(($name:ident, $m:expr, $n:expr, $k:expr))+) => {
mod $modname {
use bencher::{Bencher};
use super::reference_mat_mul;
$(
pub fn $name(bench: &mut Bencher)
{
let a = vec![0. as $ty; $m * $n];
let b = vec![0.; $n * $k];
let mut c = vec![0.; $m * $k];
bench.iter(|| {
reference_mat_mul($m, $n, $k, &a, &b, &mut c);
c[0]
});
}
)+
}
benchmark_group!{ $modname, $($modname::$name),+ }
};
}
ref_mat_mul!{ref_mat_mul_f32, f32,
(m004, 4, 4, 4)
(m005, 5, 5, 5)
(m006, 6, 6, 6)
(m007, 7, 7, 7)
(m008, 8, 8, 8)
(m009, 9, 9, 9)
(m012, 12, 12, 12)
(m016, 16, 16, 16)
(m032, 32, 32, 32)
(m064, 64, 64, 64)
}