src/kudu/util/random.h - kudu - Git at Google

 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 #ifndef KUDU_UTIL_RANDOM_H_
 #define KUDU_UTIL_RANDOM_H_

 #include <cmath>
 #include <cstdint>
 #include <mutex>
 #include <random>
 #include <vector>

 #include "kudu/gutil/map-util.h"
 #include "kudu/util/locks.h"

 namespace kudu {

 namespace random_internal {

 static const uint32_t M = 2147483647L;   // 2^31-1
 const double kTwoPi = 6.283185307179586476925286;

 } // namespace random_internal

 template<class R>
 class StdUniformRNG;

 // A very simple random number generator.  Not especially good at
 // generating truly random bits, but good enough for our needs in this
 // package. This implementation is not thread-safe.
 class Random {
  private:
   uint32_t seed_;
  public:
   explicit Random(uint32_t s) {
     Reset(s);
   }

   // Reset the RNG to the given seed value.
   void Reset(uint32_t s) {
     seed_ = s & 0x7fffffffu;
     // Avoid bad seeds.
     if (seed_ == 0 || seed_ == random_internal::M) {
       seed_ = 1;
     }
   }

   // Next pseudo-random 32-bit unsigned integer.
   // FIXME: This currently only generates 31 bits of randomness.
   // The MSB will always be zero.
   uint32_t Next() {
     static const uint64_t A = 16807;  // bits 14, 8, 7, 5, 2, 1, 0
     // We are computing
     //       seed_ = (seed_ * A) % M,    where M = 2^31-1
     //
     // seed_ must not be zero or M, or else all subsequent computed values
     // will be zero or M respectively.  For all other values, seed_ will end
     // up cycling through every number in [1,M-1]
     uint64_t product = seed_ * A;

     // Compute (product % M) using the fact that ((x << 31) % M) == x.
     seed_ = static_cast<uint32_t>((product >> 31) + (product & random_internal::M));
     // The first reduction may overflow by 1 bit, so we may need to
     // repeat.  mod == M is not possible; using > allows the faster
     // sign-bit-based test.
     if (seed_ > random_internal::M) {
       seed_ -= random_internal::M;
     }
     return seed_;
   }

   // Alias for consistency with Next64
   uint32_t Next32() { return Next(); }

   // Next pseudo-random 64-bit unsigned integer.
   // FIXME: This currently only generates 62 bits of randomness due to Next()
   // only giving 31 bits of randomness. The 2 most significant bits will always
   // be zero.
   uint64_t Next64() {
     uint64_t large = Next();
     // Only shift by 31 bits so we end up with zeros in MSB and not scattered
     // throughout the 64-bit word. This is due to the weakness in Next() noted
     // above.
     large <<= 31;
     large |= Next();
     return large;
   }

   // Returns a uniformly distributed value in the range [0..n-1]
   // REQUIRES: n > 0
   uint32_t Uniform(uint32_t n) { return Next() % n; }

   // Alias for consistency with Uniform64
   uint32_t Uniform32(uint32_t n) { return Uniform(n); }

   // Returns a uniformly distributed 64-bit value in the range [0..n-1]
   // REQUIRES: n > 0
   uint64_t Uniform64(uint64_t n) { return Next64() % n; }

   // Randomly returns true ~"1/n" of the time, and false otherwise.
   // REQUIRES: n > 0
   bool OneIn(int n) { return (Next() % n) == 0; }

   // Skewed: pick "base" uniformly from range [0,max_log] and then
   // return "base" random bits.  The effect is to pick a number in the
   // range [0,2^max_log-1] with exponential bias towards smaller numbers.
   uint32_t Skewed(int max_log) {
     return Uniform(1 << Uniform(max_log + 1));
   }

   // Samples a random number from the given normal distribution.
   double Normal(double mean, double std_dev);

   // Return a random number between 0.0 and 1.0 inclusive.
   double NextDoubleFraction() {
     return Next() / static_cast<double>(random_internal::M + 1.0);
   }

   // Sample 'k' random elements from the collection 'c' into 'result', taking care not to sample any
   // elements that are already present in 'avoid'.
   //
   // In the case that 'c' has fewer than 'k' elements then all elements in 'c' will be selected.
   //
   // 'c' should be an iterable STL collection such as a vector, set, or list.
   // 'avoid' should be an STL-compatible set.
   //
   // The results are not stored in a randomized order: the order of results will
   // match their order in the input collection.
   template<class Collection, class Set, class T>
   void ReservoirSample(const Collection& c, int k, const Set& avoid,
                        std::vector<T>* result) {
     result->clear();
     result->reserve(k);
     int i = 0;
     for (const T& elem : c) {
       if (ContainsKey(avoid, elem)) {
         continue;
       }
       i++;
       // Fill the reservoir if there is available space.
       if (result->size() < k) {
         result->push_back(elem);
         continue;
       }
       // Otherwise replace existing elements with decreasing probability.
       int j = Uniform(i);
       if (j < k) {
         (*result)[j] = elem;
       }
     }
   }
 };

 // Thread-safe wrapper around Random.
 class ThreadSafeRandom {
  public:
   explicit ThreadSafeRandom(uint32_t s)
       : random_(s) {
   }

   void Reset(uint32_t s) {
     std::lock_guard<simple_spinlock> l(lock_);
     random_.Reset(s);
   }

   uint32_t Next() {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Next();
   }

   uint32_t Next32() {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Next32();
   }

   uint64_t Next64() {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Next64();
   }

   uint32_t Uniform(uint32_t n) {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Uniform(n);
   }

   uint32_t Uniform32(uint32_t n) {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Uniform32(n);
   }

   uint64_t Uniform64(uint64_t n) {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Uniform64(n);
   }

   bool OneIn(int n) {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.OneIn(n);
   }

   uint32_t Skewed(int max_log) {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Skewed(max_log);
   }

   double Normal(double mean, double std_dev) {
     std::lock_guard<simple_spinlock> l(lock_);
     return random_.Normal(mean, std_dev);
   }

   template<class Collection, class Set, class T>
   void ReservoirSample(const Collection& c, int k, const Set& avoid,
                        std::vector<T>* result) {
     std::lock_guard<simple_spinlock> l(lock_);
     random_.ReservoirSample(c, k, avoid, result);
   }

  private:
   simple_spinlock lock_;
   Random random_;
 };

 // Wraps either Random or ThreadSafeRandom as a C++ standard library
 // compliant UniformRandomNumberGenerator:
 //   http://en.cppreference.com/w/cpp/concept/UniformRandomNumberGenerator
 template<class R>
 class StdUniformRNG {
  public:
   typedef uint32_t result_type;

   explicit StdUniformRNG(R* r) : r_(r) {}
   uint32_t operator()() {
     return r_->Next32();
   }
   constexpr static uint32_t min() { return 0; }
   constexpr static uint32_t max() { return (1L << 31) - 1; }

  private:
   R* r_;
 };

 // Defined outside the class to make use of StdUniformRNG above.
 inline double Random::Normal(double mean, double std_dev) {
   std::normal_distribution<> nd(mean, std_dev);
   StdUniformRNG<Random> gen(this);
   return nd(gen);
 }

 }  // namespace kudu

 #endif  // KUDU_UTIL_RANDOM_H_
	// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
	// Use of this source code is governed by a BSD-style license that can be
	// found in the LICENSE file. See the AUTHORS file for names of contributors.
	#ifndef KUDU_UTIL_RANDOM_H_
	#define KUDU_UTIL_RANDOM_H_

	#include <cmath>
	#include <cstdint>
	#include <mutex>
	#include <random>
	#include <vector>

	#include "kudu/gutil/map-util.h"
	#include "kudu/util/locks.h"

	namespace kudu {

	namespace random_internal {

	static const uint32_t M = 2147483647L; // 2^31-1
	const double kTwoPi = 6.283185307179586476925286;

	} // namespace random_internal

	template<class R>
	class StdUniformRNG;

	// A very simple random number generator. Not especially good at
	// generating truly random bits, but good enough for our needs in this
	// package. This implementation is not thread-safe.
	class Random {
	private:
	uint32_t seed_;
	public:
	explicit Random(uint32_t s) {
	Reset(s);
	}

	// Reset the RNG to the given seed value.
	void Reset(uint32_t s) {
	seed_ = s & 0x7fffffffu;
	// Avoid bad seeds.
	if (seed_ == 0 \|\| seed_ == random_internal::M) {
	seed_ = 1;
	}
	}

	// Next pseudo-random 32-bit unsigned integer.
	// FIXME: This currently only generates 31 bits of randomness.
	// The MSB will always be zero.
	uint32_t Next() {
	static const uint64_t A = 16807; // bits 14, 8, 7, 5, 2, 1, 0
	// We are computing
	// seed_ = (seed_ * A) % M, where M = 2^31-1
	//
	// seed_ must not be zero or M, or else all subsequent computed values
	// will be zero or M respectively. For all other values, seed_ will end
	// up cycling through every number in [1,M-1]
	uint64_t product = seed_ * A;

	// Compute (product % M) using the fact that ((x << 31) % M) == x.
	seed_ = static_cast<uint32_t>((product >> 31) + (product & random_internal::M));
	// The first reduction may overflow by 1 bit, so we may need to
	// repeat. mod == M is not possible; using > allows the faster
	// sign-bit-based test.
	if (seed_ > random_internal::M) {
	seed_ -= random_internal::M;
	}
	return seed_;
	}

	// Alias for consistency with Next64
	uint32_t Next32() { return Next(); }

	// Next pseudo-random 64-bit unsigned integer.
	// FIXME: This currently only generates 62 bits of randomness due to Next()
	// only giving 31 bits of randomness. The 2 most significant bits will always
	// be zero.
	uint64_t Next64() {
	uint64_t large = Next();
	// Only shift by 31 bits so we end up with zeros in MSB and not scattered
	// throughout the 64-bit word. This is due to the weakness in Next() noted
	// above.
	large <<= 31;
	large \|= Next();
	return large;
	}

	// Returns a uniformly distributed value in the range [0..n-1]
	// REQUIRES: n > 0
	uint32_t Uniform(uint32_t n) { return Next() % n; }

	// Alias for consistency with Uniform64
	uint32_t Uniform32(uint32_t n) { return Uniform(n); }

	// Returns a uniformly distributed 64-bit value in the range [0..n-1]
	// REQUIRES: n > 0
	uint64_t Uniform64(uint64_t n) { return Next64() % n; }

	// Randomly returns true ~"1/n" of the time, and false otherwise.
	// REQUIRES: n > 0
	bool OneIn(int n) { return (Next() % n) == 0; }

	// Skewed: pick "base" uniformly from range [0,max_log] and then
	// return "base" random bits. The effect is to pick a number in the
	// range [0,2^max_log-1] with exponential bias towards smaller numbers.
	uint32_t Skewed(int max_log) {
	return Uniform(1 << Uniform(max_log + 1));
	}

	// Samples a random number from the given normal distribution.
	double Normal(double mean, double std_dev);

	// Return a random number between 0.0 and 1.0 inclusive.
	double NextDoubleFraction() {
	return Next() / static_cast<double>(random_internal::M + 1.0);
	}

	// Sample 'k' random elements from the collection 'c' into 'result', taking care not to sample any
	// elements that are already present in 'avoid'.
	//
	// In the case that 'c' has fewer than 'k' elements then all elements in 'c' will be selected.
	//
	// 'c' should be an iterable STL collection such as a vector, set, or list.
	// 'avoid' should be an STL-compatible set.
	//
	// The results are not stored in a randomized order: the order of results will
	// match their order in the input collection.
	template<class Collection, class Set, class T>
	void ReservoirSample(const Collection& c, int k, const Set& avoid,
	std::vector<T>* result) {
	result->clear();
	result->reserve(k);
	int i = 0;
	for (const T& elem : c) {
	if (ContainsKey(avoid, elem)) {
	continue;
	}
	i++;
	// Fill the reservoir if there is available space.
	if (result->size() < k) {
	result->push_back(elem);
	continue;
	}
	// Otherwise replace existing elements with decreasing probability.
	int j = Uniform(i);
	if (j < k) {
	(*result)[j] = elem;
	}
	}
	}
	};

	// Thread-safe wrapper around Random.
	class ThreadSafeRandom {
	public:
	explicit ThreadSafeRandom(uint32_t s)
	: random_(s) {
	}

	void Reset(uint32_t s) {
	std::lock_guard<simple_spinlock> l(lock_);
	random_.Reset(s);
	}

	uint32_t Next() {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Next();
	}

	uint32_t Next32() {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Next32();
	}

	uint64_t Next64() {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Next64();
	}

	uint32_t Uniform(uint32_t n) {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Uniform(n);
	}

	uint32_t Uniform32(uint32_t n) {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Uniform32(n);
	}

	uint64_t Uniform64(uint64_t n) {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Uniform64(n);
	}

	bool OneIn(int n) {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.OneIn(n);
	}

	uint32_t Skewed(int max_log) {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Skewed(max_log);
	}

	double Normal(double mean, double std_dev) {
	std::lock_guard<simple_spinlock> l(lock_);
	return random_.Normal(mean, std_dev);
	}

	template<class Collection, class Set, class T>
	void ReservoirSample(const Collection& c, int k, const Set& avoid,
	std::vector<T>* result) {
	std::lock_guard<simple_spinlock> l(lock_);
	random_.ReservoirSample(c, k, avoid, result);
	}

	private:
	simple_spinlock lock_;
	Random random_;
	};

	// Wraps either Random or ThreadSafeRandom as a C++ standard library
	// compliant UniformRandomNumberGenerator:
	// http://en.cppreference.com/w/cpp/concept/UniformRandomNumberGenerator
	template<class R>
	class StdUniformRNG {
	public:
	typedef uint32_t result_type;

	explicit StdUniformRNG(R* r) : r_(r) {}
	uint32_t operator()() {
	return r_->Next32();
	}
	constexpr static uint32_t min() { return 0; }
	constexpr static uint32_t max() { return (1L << 31) - 1; }

	private:
	R* r_;
	};

	// Defined outside the class to make use of StdUniformRNG above.
	inline double Random::Normal(double mean, double std_dev) {
	std::normal_distribution<> nd(mean, std_dev);
	StdUniformRNG<Random> gen(this);
	return nd(gen);
	}

	} // namespace kudu

	#endif // KUDU_UTIL_RANDOM_H_