rayon/rayon-demo/src/sieve/mod.rs - incubator-teaclave-crates - Git at Google

 const USAGE: &str = "
 Usage: sieve bench
        sieve --help

 Sieve of Eratosthenes

 A sieve finds prime numbers by first assuming all candidates are prime,
 then progressively using small primes to mark their multiples composite.

 Note that a given prime p only needs to start marking its multiples >=p²,
 as anything less will already have been marked by an even smaller prime
 factor.  This also means that sieving up to N only requires primes up to
 sqrt(N) - a useful fact to prepare for parallel sieving.

 Here we have three forms of the sieve:

 - `sieve_serial`: direct iteration in serial
 - `sieve_chunks`: chunked iteration, still serial
 - `sieve_parallel`: chunked iteration in parallel

 The performance improvement of `sieve_chunks` is largely due to cache
 locality.  Since the sieve has to sweep repeatedly over the same memory,
 it's helpful that the chunk size can fit entirely in the CPU cache.  Then
 `sieve_parallel` also benefits from this as long as there's cache room for
 multiple chunks, for the separate jobs in each thread.

 Commands:
     bench              Run the benchmark in different modes and print the timings.

 Options:
     -h, --help         Show this message.
 ";

 #[derive(serde::Deserialize)]
 pub struct Args {
     cmd_bench: bool,
 }

 #[cfg(test)]
 mod bench;

 use docopt::Docopt;
 use rayon::prelude::*;
 use std::time::{Duration, Instant};

 const CHUNK_SIZE: usize = 100_000;

 // Number of Primes < 10^n
 // https://oeis.org/A006880
 const NUM_PRIMES: &[usize] = &[
     0,  // primes in 0..10^0
     4,  // primes in 0..10^1
     25, // etc
     168,
     1_229,
     9_592,
     78_498,
     664_579,
     5_761_455,
     50_847_534,
     455_052_511,
     4_118_054_813,
     37_607_912_018,
     346_065_536_839,
     3_204_941_750_802,
     29_844_570_422_669,
     279_238_341_033_925,
     2_623_557_157_654_233,
     24_739_954_287_740_860,
     234_057_667_276_344_607,
     2_220_819_602_560_918_840,
 ];

 // For all of these sieves, sieve[i]==true -> 2*i+1 is prime

 fn max(magnitude: usize) -> usize {
     10_usize.pow(magnitude as u32)
 }

 /// Sieve odd integers for primes < max.
 fn sieve_serial(max: usize) -> Vec<bool> {
     let mut sieve = vec![true; max / 2];
     sieve[0] = false; // 1 is not prime
     for i in 1.. {
         if sieve[i] {
             let p = 2 * i + 1;
             let pp = p * p;
             if pp >= max {
                 break;
             }
             clear_stride(&mut sieve, pp / 2, p);
         }
     }
     sieve
 }

 /// Sieve odd integers for primes < max using chunks.
 fn sieve_chunks(max: usize) -> Vec<bool> {
     // first compute the small primes, up to sqrt(max).
     let small_max = (max as f64).sqrt().ceil() as usize;
     let mut sieve = sieve_serial(small_max);
     sieve.resize(max / 2, true);

     {
         let (low, high) = sieve.split_at_mut(small_max / 2);
         for (chunk_index, chunk) in high.chunks_mut(CHUNK_SIZE).enumerate() {
             let i = small_max / 2 + chunk_index * CHUNK_SIZE;
             let base = i * 2 + 1;
             update_chunk(low, chunk, base);
         }
     }

     sieve
 }

 /// Sieve odd integers for primes < max, in parallel!
 fn sieve_parallel(max: usize) -> Vec<bool> {
     // first compute the small primes, up to sqrt(max).
     let small_max = (max as f64).sqrt().ceil() as usize;
     let mut sieve = sieve_serial(small_max);
     sieve.resize(max / 2, true);

     {
         let (low, high) = sieve.split_at_mut(small_max / 2);
         high.par_chunks_mut(CHUNK_SIZE)
             .enumerate() // to figure out where this chunk came from
             .with_max_len(1) // ensure every single chunk is a separate rayon job
             .for_each(|(chunk_index, chunk)| {
                 let i = small_max / 2 + chunk_index * CHUNK_SIZE;
                 let base = i * 2 + 1;
                 update_chunk(low, chunk, base);
             });
     }

     sieve
 }

 /// Update a chunk with low primes
 fn update_chunk(low: &[bool], chunk: &mut [bool], base: usize) {
     let max = base + chunk.len() * 2;
     for (i, &is_prime) in low.iter().enumerate() {
         if is_prime {
             let p = 2 * i + 1;
             let pp = p * p;
             if pp >= max {
                 break;
             }

             let pm = if pp < base {
                 // p² is too small - find the first odd multiple that's in range
                 (((base + p - 1) / p) | 1) * p
             } else {
                 pp
             };

             if pm < max {
                 clear_stride(chunk, (pm - base) / 2, p);
             }
         }
     }
 }

 fn clear_stride(slice: &mut [bool], from: usize, stride: usize) {
     slice[from..]
         .iter_mut()
         .step_by(stride)
         .for_each(|x| *x = false)
 }

 fn measure(f: fn(usize) -> Vec<bool>) -> Duration {
     const MAGNITUDE: usize = 9;

     let start = Instant::now();
     let sieve = f(max(MAGNITUDE));
     let duration = start.elapsed();

     // sanity check the number of primes found
     let num_primes = 1 + sieve.into_iter().filter(|&b| b).count();
     assert_eq!(num_primes, NUM_PRIMES[MAGNITUDE]);

     duration
 }

 pub fn main(args: &[String]) {
     let args: Args = Docopt::new(USAGE)
         .and_then(|d| d.argv(args).deserialize())
         .unwrap_or_else(|e| e.exit());

     if args.cmd_bench {
         let serial = measure(sieve_serial).as_nanos();
         println!("  serial: {:10} ns", serial);

         let chunks = measure(sieve_chunks).as_nanos();
         println!(
             "  chunks: {:10} ns -> {:.2}x speedup",
             chunks,
             serial as f64 / chunks as f64
         );

         let parallel = measure(sieve_parallel).as_nanos();
         println!(
             "parallel: {:10} ns -> {:.2}x speedup",
             parallel,
             chunks as f64 / parallel as f64
         );
     }
 }
	const USAGE: &str = "
	Usage: sieve bench
	sieve --help

	Sieve of Eratosthenes

	A sieve finds prime numbers by first assuming all candidates are prime,
	then progressively using small primes to mark their multiples composite.

	Note that a given prime p only needs to start marking its multiples >=p²,
	as anything less will already have been marked by an even smaller prime
	factor. This also means that sieving up to N only requires primes up to
	sqrt(N) - a useful fact to prepare for parallel sieving.

	Here we have three forms of the sieve:

	- `sieve_serial`: direct iteration in serial
	- `sieve_chunks`: chunked iteration, still serial
	- `sieve_parallel`: chunked iteration in parallel

	The performance improvement of `sieve_chunks` is largely due to cache
	locality. Since the sieve has to sweep repeatedly over the same memory,
	it's helpful that the chunk size can fit entirely in the CPU cache. Then
	`sieve_parallel` also benefits from this as long as there's cache room for
	multiple chunks, for the separate jobs in each thread.

	Commands:
	bench Run the benchmark in different modes and print the timings.

	Options:
	-h, --help Show this message.
	";

	#[derive(serde::Deserialize)]
	pub struct Args {
	cmd_bench: bool,
	}

	#[cfg(test)]
	mod bench;

	use docopt::Docopt;
	use rayon::prelude::*;
	use std::time::{Duration, Instant};

	const CHUNK_SIZE: usize = 100_000;

	// Number of Primes < 10^n
	// https://oeis.org/A006880
	const NUM_PRIMES: &[usize] = &[
	0, // primes in 0..10^0
	4, // primes in 0..10^1
	25, // etc
	168,
	1_229,
	9_592,
	78_498,
	664_579,
	5_761_455,
	50_847_534,
	455_052_511,
	4_118_054_813,
	37_607_912_018,
	346_065_536_839,
	3_204_941_750_802,
	29_844_570_422_669,
	279_238_341_033_925,
	2_623_557_157_654_233,
	24_739_954_287_740_860,
	234_057_667_276_344_607,
	2_220_819_602_560_918_840,
	];

	// For all of these sieves, sieve[i]==true -> 2*i+1 is prime

	fn max(magnitude: usize) -> usize {
	10_usize.pow(magnitude as u32)
	}

	/// Sieve odd integers for primes < max.
	fn sieve_serial(max: usize) -> Vec<bool> {
	let mut sieve = vec![true; max / 2];
	sieve[0] = false; // 1 is not prime
	for i in 1.. {
	if sieve[i] {
	let p = 2 * i + 1;
	let pp = p * p;
	if pp >= max {
	break;
	}
	clear_stride(&mut sieve, pp / 2, p);
	}
	}
	sieve
	}

	/// Sieve odd integers for primes < max using chunks.
	fn sieve_chunks(max: usize) -> Vec<bool> {
	// first compute the small primes, up to sqrt(max).
	let small_max = (max as f64).sqrt().ceil() as usize;
	let mut sieve = sieve_serial(small_max);
	sieve.resize(max / 2, true);

	{
	let (low, high) = sieve.split_at_mut(small_max / 2);
	for (chunk_index, chunk) in high.chunks_mut(CHUNK_SIZE).enumerate() {
	let i = small_max / 2 + chunk_index * CHUNK_SIZE;
	let base = i * 2 + 1;
	update_chunk(low, chunk, base);
	}
	}

	sieve
	}

	/// Sieve odd integers for primes < max, in parallel!
	fn sieve_parallel(max: usize) -> Vec<bool> {
	// first compute the small primes, up to sqrt(max).
	let small_max = (max as f64).sqrt().ceil() as usize;
	let mut sieve = sieve_serial(small_max);
	sieve.resize(max / 2, true);

	{
	let (low, high) = sieve.split_at_mut(small_max / 2);
	high.par_chunks_mut(CHUNK_SIZE)
	.enumerate() // to figure out where this chunk came from
	.with_max_len(1) // ensure every single chunk is a separate rayon job
	.for_each(\|(chunk_index, chunk)\| {
	let i = small_max / 2 + chunk_index * CHUNK_SIZE;
	let base = i * 2 + 1;
	update_chunk(low, chunk, base);
	});
	}

	sieve
	}

	/// Update a chunk with low primes
	fn update_chunk(low: &[bool], chunk: &mut [bool], base: usize) {
	let max = base + chunk.len() * 2;
	for (i, &is_prime) in low.iter().enumerate() {
	if is_prime {
	let p = 2 * i + 1;
	let pp = p * p;
	if pp >= max {
	break;
	}

	let pm = if pp < base {
	// p² is too small - find the first odd multiple that's in range
	(((base + p - 1) / p) \| 1) * p
	} else {
	pp
	};

	if pm < max {
	clear_stride(chunk, (pm - base) / 2, p);
	}
	}
	}
	}

	fn clear_stride(slice: &mut [bool], from: usize, stride: usize) {
	slice[from..]
	.iter_mut()
	.step_by(stride)
	.for_each(\|x\| *x = false)
	}

	fn measure(f: fn(usize) -> Vec<bool>) -> Duration {
	const MAGNITUDE: usize = 9;

	let start = Instant::now();
	let sieve = f(max(MAGNITUDE));
	let duration = start.elapsed();

	// sanity check the number of primes found
	let num_primes = 1 + sieve.into_iter().filter(\|&b\| b).count();
	assert_eq!(num_primes, NUM_PRIMES[MAGNITUDE]);

	duration
	}

	pub fn main(args: &[String]) {
	let args: Args = Docopt::new(USAGE)
	.and_then(\|d\| d.argv(args).deserialize())
	.unwrap_or_else(\|e\| e.exit());

	if args.cmd_bench {
	let serial = measure(sieve_serial).as_nanos();
	println!(" serial: {:10} ns", serial);

	let chunks = measure(sieve_chunks).as_nanos();
	println!(
	" chunks: {:10} ns -> {:.2}x speedup",
	chunks,
	serial as f64 / chunks as f64
	);

	let parallel = measure(sieve_parallel).as_nanos();
	println!(
	"parallel: {:10} ns -> {:.2}x speedup",
	parallel,
	chunks as f64 / parallel as f64
	);
	}
	}