| /* |
| * PCG Random Number Generation for C++ |
| * |
| * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| * |
| * For additional information about the PCG random number generation scheme, |
| * including its license and other licensing options, visit |
| * |
| * http://www.pcg-random.org |
| */ |
| |
| /* |
| * This file provides support code that is useful for random-number generation |
| * but not specific to the PCG generation scheme, including: |
| * - 128-bit int support for platforms where it isn't available natively |
| * - bit twiddling operations |
| * - I/O of 128-bit and 8-bit integers |
| * - Handling the evilness of SeedSeq |
| * - Support for efficiently producing random numbers less than a given |
| * bound |
| */ |
| |
| #ifndef PCG_EXTRAS_HPP_INCLUDED |
| #define PCG_EXTRAS_HPP_INCLUDED 1 |
| // To avoid of warning: 'PCG_USE_ZEROCHECK_ROTATE_IDIOM' is not defined, evaluates to 0 |
| #ifndef PCG_USE_ZEROCHECK_ROTATE_IDIOM |
| #define PCG_USE_ZEROCHECK_ROTATE_IDIOM 0 |
| #endif // PCG_USE_ZEROCHECK_ROTATE_IDIOM |
| // To avoid of warning: 'PCG_USE_INLINE_ASM' is not defined, evaluates to 0 |
| #ifndef PCG_USE_INLINE_ASM |
| #define PCG_USE_INLINE_ASM 0 |
| #endif // PCG_USE_INLINE_ASM |
| |
| #include <cinttypes> |
| #include <cstddef> |
| #include <cstdlib> |
| #include <cstring> |
| #include <cassert> |
| #include <limits> |
| #include <iostream> |
| #include <type_traits> |
| #include <utility> |
| #include <locale> |
| #include <iterator> |
| #include <utility> |
| |
| #ifdef __GNUC__ |
| #include <cxxabi.h> |
| #endif |
| |
| /* |
| * Abstractions for compiler-specific directives |
| */ |
| |
| #ifdef __GNUC__ |
| #define PCG_NOINLINE __attribute__((noinline)) |
| #else |
| #define PCG_NOINLINE |
| #endif |
| |
| /* |
| * Some members of the PCG library use 128-bit math. When compiling on 64-bit |
| * platforms, both GCC and Clang provide 128-bit integer types that are ideal |
| * for the job. |
| * |
| * On 32-bit platforms (or with other compilers), we fall back to a C++ |
| * class that provides 128-bit unsigned integers instead. It may seem |
| * like we're reinventing the wheel here, because libraries already exist |
| * that support large integers, but most existing libraries provide a very |
| * generic multiprecision code, but here we're operating at a fixed size. |
| * Also, most other libraries are fairly heavyweight. So we use a direct |
| * implementation. Sadly, it's much slower than hand-coded assembly or |
| * direct CPU support. |
| * |
| */ |
| #if __SIZEOF_INT128__ |
| namespace pcg_extras { |
| typedef __uint128_t pcg128_t; |
| } |
| #define PCG_128BIT_CONSTANT(high,low) \ |
| ((pcg128_t(high) << 64) + low) |
| #else |
| #include "pcg_uint128.hpp" |
| namespace pcg_extras { |
| typedef pcg_extras::uint_x4<uint32_t,uint64_t> pcg128_t; |
| } |
| #define PCG_128BIT_CONSTANT(high,low) \ |
| pcg128_t(high,low) |
| #define PCG_EMULATED_128BIT_MATH 1 |
| #endif |
| |
| |
| namespace pcg_extras { |
| |
| /* |
| * We often need to represent a "number of bits". When used normally, these |
| * numbers are never greater than 128, so an unsigned char is plenty. |
| * If you're using a nonstandard generator of a larger size, you can set |
| * PCG_BITCOUNT_T to have it define it as a larger size. (Some compilers |
| * might produce faster code if you set it to an unsigned int.) |
| */ |
| |
| #ifndef PCG_BITCOUNT_T |
| typedef uint8_t bitcount_t; |
| #else |
| typedef PCG_BITCOUNT_T bitcount_t; |
| #endif |
| |
| /* |
| * C++ requires us to be able to serialize RNG state by printing or reading |
| * it from a stream. Because we use 128-bit ints, we also need to be able |
| * ot print them, so here is code to do so. |
| * |
| * This code provides enough functionality to print 128-bit ints in decimal |
| * and zero-padded in hex. It's not a full-featured implementation. |
| */ |
| |
| template <typename CharT, typename Traits> |
| std::basic_ostream<CharT,Traits>& |
| operator<<(std::basic_ostream<CharT,Traits>& out, pcg128_t value) |
| { |
| auto desired_base = out.flags() & out.basefield; |
| bool want_hex = desired_base == out.hex; |
| |
| if (want_hex) { |
| uint64_t highpart = uint64_t(value >> 64); |
| uint64_t lowpart = uint64_t(value); |
| auto desired_width = out.width(); |
| if (desired_width > 16) { |
| out.width(desired_width - 16); |
| } |
| if (highpart != 0 || desired_width > 16) |
| out << highpart; |
| CharT oldfill; |
| if (highpart != 0) { |
| out.width(16); |
| oldfill = out.fill('0'); |
| } |
| auto oldflags = out.setf(decltype(desired_base){}, out.showbase); |
| out << lowpart; |
| out.setf(oldflags); |
| if (highpart != 0) { |
| out.fill(oldfill); |
| } |
| return out; |
| } |
| constexpr size_t MAX_CHARS_128BIT = 40; |
| |
| char buffer[MAX_CHARS_128BIT]; |
| char* pos = buffer+sizeof(buffer); |
| *(--pos) = '\0'; |
| constexpr auto BASE = pcg128_t(10ULL); |
| do { |
| auto div = value / BASE; |
| auto mod = uint32_t(value - (div * BASE)); |
| *(--pos) = '0' + mod; |
| value = div; |
| } while(value != pcg128_t(0ULL)); |
| return out << pos; |
| } |
| |
| template <typename CharT, typename Traits> |
| std::basic_istream<CharT,Traits>& |
| operator>>(std::basic_istream<CharT,Traits>& in, pcg128_t& value) |
| { |
| typename std::basic_istream<CharT,Traits>::sentry s(in); |
| |
| if (!s) |
| return in; |
| |
| constexpr auto BASE = pcg128_t(10ULL); |
| pcg128_t current(0ULL); |
| bool did_nothing = true; |
| bool overflow = false; |
| for(;;) { |
| CharT wide_ch = in.get(); |
| if (!in.good()) |
| break; |
| auto ch = in.narrow(wide_ch, '\0'); |
| if (ch < '0' || ch > '9') { |
| in.unget(); |
| break; |
| } |
| did_nothing = false; |
| pcg128_t digit(uint32_t(ch - '0')); |
| pcg128_t timesbase = current*BASE; |
| overflow = overflow || timesbase < current; |
| current = timesbase + digit; |
| overflow = overflow || current < digit; |
| } |
| |
| if (did_nothing || overflow) { |
| in.setstate(std::ios::failbit); |
| if (overflow) |
| current = ~pcg128_t(0ULL); |
| } |
| |
| value = current; |
| |
| return in; |
| } |
| |
| /* |
| * Likewise, if people use tiny rngs, we'll be serializing uint8_t. |
| * If we just used the provided IO operators, they'd read/write chars, |
| * not ints, so we need to define our own. We *can* redefine this operator |
| * here because we're in our own namespace. |
| */ |
| |
| template <typename CharT, typename Traits> |
| std::basic_ostream<CharT,Traits>& |
| operator<<(std::basic_ostream<CharT,Traits>&out, uint8_t value) |
| { |
| return out << uint32_t(value); |
| } |
| |
| template <typename CharT, typename Traits> |
| std::basic_istream<CharT,Traits>& |
| operator>>(std::basic_istream<CharT,Traits>& in, uint8_t target) |
| { |
| uint32_t value = 0xdecea5edU; |
| in >> value; |
| if (!in && value == 0xdecea5edU) |
| return in; |
| if (value > uint8_t(~0)) { |
| in.setstate(std::ios::failbit); |
| value = ~0U; |
| } |
| target = uint8_t(value); |
| return in; |
| } |
| |
| /* Unfortunately, the above functions don't get found in preference to the |
| * built in ones, so we create some more specific overloads that will. |
| * Ugh. |
| */ |
| |
| inline std::ostream& operator<<(std::ostream& out, uint8_t value) |
| { |
| return pcg_extras::operator<< <char>(out, value); |
| } |
| |
| inline std::istream& operator>>(std::istream& in, uint8_t& value) |
| { |
| return pcg_extras::operator>> <char>(in, value); |
| } |
| |
| |
| |
| /* |
| * Useful bitwise operations. |
| */ |
| |
| /* |
| * XorShifts are invertable, but they are someting of a pain to invert. |
| * This function backs them out. It's used by the whacky "inside out" |
| * generator defined later. |
| */ |
| |
| template <typename itype> |
| inline itype unxorshift(itype x, bitcount_t bits, bitcount_t shift) |
| { |
| if (2*shift >= bits) { |
| return x ^ (x >> shift); |
| } |
| itype lowmask1 = (itype(1U) << (bits - shift*2)) - 1; |
| itype highmask1 = ~lowmask1; |
| itype top1 = x; |
| itype bottom1 = x & lowmask1; |
| top1 ^= top1 >> shift; |
| top1 &= highmask1; |
| x = top1 | bottom1; |
| itype lowmask2 = (itype(1U) << (bits - shift)) - 1; |
| itype bottom2 = x & lowmask2; |
| bottom2 = unxorshift(bottom2, bits - shift, shift); |
| bottom2 &= lowmask1; |
| return top1 | bottom2; |
| } |
| |
| /* |
| * Rotate left and right. |
| * |
| * In ideal world, compilers would spot idiomatic rotate code and convert it |
| * to a rotate instruction. Of course, opinions vary on what the correct |
| * idiom is and how to spot it. For clang, sometimes it generates better |
| * (but still crappy) code if you define PCG_USE_ZEROCHECK_ROTATE_IDIOM. |
| */ |
| |
| template <typename itype> |
| inline itype rotl(itype value, bitcount_t rot) |
| { |
| constexpr bitcount_t bits = sizeof(itype) * 8; |
| constexpr bitcount_t mask = bits - 1; |
| #if PCG_USE_ZEROCHECK_ROTATE_IDIOM |
| return rot ? (value << rot) | (value >> (bits - rot)) : value; |
| #else |
| return (value << rot) | (value >> ((- rot) & mask)); |
| #endif |
| } |
| |
| template <typename itype> |
| inline itype rotr(itype value, bitcount_t rot) |
| { |
| constexpr bitcount_t bits = sizeof(itype) * 8; |
| constexpr bitcount_t mask = bits - 1; |
| #if PCG_USE_ZEROCHECK_ROTATE_IDIOM |
| return rot ? (value >> rot) | (value << (bits - rot)) : value; |
| #else |
| return (value >> rot) | (value << ((- rot) & mask)); |
| #endif |
| } |
| |
| /* Unfortunately, both Clang and GCC sometimes perform poorly when it comes |
| * to properly recognizing idiomatic rotate code, so for we also provide |
| * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. |
| * (I hope that these compilers get better so that this code can die.) |
| * |
| * These overloads will be preferred over the general template code above. |
| */ |
| #if PCG_USE_INLINE_ASM && __GNUC__ && (__x86_64__ || __i386__) |
| |
| inline uint8_t rotr(uint8_t value, bitcount_t rot) |
| { |
| asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| return value; |
| } |
| |
| inline uint16_t rotr(uint16_t value, bitcount_t rot) |
| { |
| asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| return value; |
| } |
| |
| inline uint32_t rotr(uint32_t value, bitcount_t rot) |
| { |
| asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| return value; |
| } |
| |
| #if __x86_64__ |
| inline uint64_t rotr(uint64_t value, bitcount_t rot) |
| { |
| asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); |
| return value; |
| } |
| #endif // __x86_64__ |
| |
| #endif // PCG_USE_INLINE_ASM |
| |
| |
| /* |
| * The C++ SeedSeq concept (modelled by seed_seq) can fill an array of |
| * 32-bit integers with seed data, but sometimes we want to produce |
| * larger or smaller integers. |
| * |
| * The following code handles this annoyance. |
| * |
| * uneven_copy will copy an array of 32-bit ints to an array of larger or |
| * smaller ints (actually, the code is general it only needing forward |
| * iterators). The copy is identical to the one that would be performed if |
| * we just did memcpy on a standard little-endian machine, but works |
| * regardless of the endian of the machine (or the weirdness of the ints |
| * involved). |
| * |
| * generate_to initializes an array of integers using a SeedSeq |
| * object. It is given the size as a static constant at compile time and |
| * tries to avoid memory allocation. If we're filling in 32-bit constants |
| * we just do it directly. If we need a separate buffer and it's small, |
| * we allocate it on the stack. Otherwise, we fall back to heap allocation. |
| * Ugh. |
| * |
| * generate_one produces a single value of some integral type using a |
| * SeedSeq object. |
| */ |
| |
| /* uneven_copy helper, case where destination ints are less than 32 bit. */ |
| |
| template<class SrcIter, class DestIter> |
| SrcIter uneven_copy_impl( |
| SrcIter src_first, DestIter dest_first, DestIter dest_last, |
| std::true_type) |
| { |
| typedef typename std::iterator_traits<SrcIter>::value_type src_t; |
| typedef typename std::iterator_traits<DestIter>::value_type dest_t; |
| |
| constexpr bitcount_t SRC_SIZE = sizeof(src_t); |
| constexpr bitcount_t DEST_SIZE = sizeof(dest_t); |
| constexpr bitcount_t DEST_BITS = DEST_SIZE * 8; |
| constexpr bitcount_t SCALE = SRC_SIZE / DEST_SIZE; |
| |
| size_t count = 0; |
| src_t value; |
| |
| while (dest_first != dest_last) { |
| if ((count++ % SCALE) == 0) |
| value = *src_first++; // Get more bits |
| else |
| value >>= DEST_BITS; // Move down bits |
| |
| *dest_first++ = dest_t(value); // Truncates, ignores high bits. |
| } |
| return src_first; |
| } |
| |
| /* uneven_copy helper, case where destination ints are more than 32 bit. */ |
| |
| template<class SrcIter, class DestIter> |
| SrcIter uneven_copy_impl( |
| SrcIter src_first, DestIter dest_first, DestIter dest_last, |
| std::false_type) |
| { |
| typedef typename std::iterator_traits<SrcIter>::value_type src_t; |
| typedef typename std::iterator_traits<DestIter>::value_type dest_t; |
| |
| constexpr auto SRC_SIZE = sizeof(src_t); |
| constexpr auto SRC_BITS = SRC_SIZE * 8; |
| constexpr auto DEST_SIZE = sizeof(dest_t); |
| constexpr auto SCALE = (DEST_SIZE+SRC_SIZE-1) / SRC_SIZE; |
| |
| while (dest_first != dest_last) { |
| dest_t value(0UL); |
| unsigned int shift = 0; |
| |
| for (size_t i = 0; i < SCALE; ++i) { |
| value |= dest_t(*src_first++) << shift; |
| shift += SRC_BITS; |
| } |
| |
| *dest_first++ = value; |
| } |
| return src_first; |
| } |
| |
| /* uneven_copy, call the right code for larger vs. smaller */ |
| |
| template<class SrcIter, class DestIter> |
| inline SrcIter uneven_copy(SrcIter src_first, |
| DestIter dest_first, DestIter dest_last) |
| { |
| typedef typename std::iterator_traits<SrcIter>::value_type src_t; |
| typedef typename std::iterator_traits<DestIter>::value_type dest_t; |
| |
| constexpr bool DEST_IS_SMALLER = sizeof(dest_t) < sizeof(src_t); |
| |
| return uneven_copy_impl(src_first, dest_first, dest_last, |
| std::integral_constant<bool, DEST_IS_SMALLER>{}); |
| } |
| |
| /* generate_to, fill in a fixed-size array of integral type using a SeedSeq |
| * (actually works for any random-access iterator) |
| */ |
| |
| template <size_t size, typename SeedSeq, typename DestIter> |
| inline void generate_to_impl(SeedSeq&& generator, DestIter dest, |
| std::true_type) |
| { |
| generator.generate(dest, dest+size); |
| } |
| |
| template <size_t size, typename SeedSeq, typename DestIter> |
| void generate_to_impl(SeedSeq&& generator, DestIter dest, |
| std::false_type) |
| { |
| typedef typename std::iterator_traits<DestIter>::value_type dest_t; |
| constexpr auto DEST_SIZE = sizeof(dest_t); |
| constexpr auto GEN_SIZE = sizeof(uint32_t); |
| |
| constexpr bool GEN_IS_SMALLER = GEN_SIZE < DEST_SIZE; |
| constexpr size_t FROM_ELEMS = |
| GEN_IS_SMALLER |
| ? size * ((DEST_SIZE+GEN_SIZE-1) / GEN_SIZE) |
| : (size + (GEN_SIZE / DEST_SIZE) - 1) |
| / ((GEN_SIZE / DEST_SIZE) + GEN_IS_SMALLER); |
| // this odd code ^^^^^^^^^^^^^^^^^ is work-around for |
| // a bug: http://llvm.org/bugs/show_bug.cgi?id=21287 |
| |
| if (FROM_ELEMS <= 1024) { |
| uint32_t buffer[FROM_ELEMS]; |
| generator.generate(buffer, buffer+FROM_ELEMS); |
| uneven_copy(buffer, dest, dest+size); |
| } else { |
| uint32_t* buffer = (uint32_t*) malloc(GEN_SIZE * FROM_ELEMS); |
| generator.generate(buffer, buffer+FROM_ELEMS); |
| uneven_copy(buffer, dest, dest+size); |
| free(buffer); |
| } |
| } |
| |
| template <size_t size, typename SeedSeq, typename DestIter> |
| inline void generate_to(SeedSeq&& generator, DestIter dest) |
| { |
| typedef typename std::iterator_traits<DestIter>::value_type dest_t; |
| constexpr bool IS_32BIT = sizeof(dest_t) == sizeof(uint32_t); |
| |
| generate_to_impl<size>(std::forward<SeedSeq>(generator), dest, |
| std::integral_constant<bool, IS_32BIT>{}); |
| } |
| |
| /* generate_one, produce a value of integral type using a SeedSeq |
| * (optionally, we can have it produce more than one and pick which one |
| * we want) |
| */ |
| |
| template <typename UInt, size_t i = 0UL, size_t N = i+1UL, typename SeedSeq> |
| inline UInt generate_one(SeedSeq&& generator) |
| { |
| UInt result[N]; |
| generate_to<N>(std::forward<SeedSeq>(generator), result); |
| return result[i]; |
| } |
| |
| template <typename RngType> |
| auto bounded_rand(RngType& rng, typename RngType::result_type upper_bound) |
| -> typename RngType::result_type |
| { |
| typedef typename RngType::result_type rtype; |
| rtype threshold = (RngType::max() - RngType::min() + rtype(1) - upper_bound) |
| % upper_bound; |
| for (;;) { |
| rtype r = rng() - RngType::min(); |
| if (r >= threshold) |
| return r % upper_bound; |
| } |
| } |
| |
| template <typename Iter, typename RandType> |
| void shuffle(Iter from, Iter to, RandType&& rng) |
| { |
| typedef typename std::iterator_traits<Iter>::difference_type delta_t; |
| auto count = to - from; |
| while (count > 1) { |
| delta_t chosen(bounded_rand(rng, count)); |
| --count; |
| --to; |
| using std::swap; |
| swap(*(from+chosen), *to); |
| } |
| } |
| |
| /* |
| * Although std::seed_seq is useful, it isn't everything. Often we want to |
| * initialize a random-number generator some other way, such as from a random |
| * device. |
| * |
| * Technically, it does not meet the requirements of a SeedSequence because |
| * it lacks some of the rarely-used member functions (some of which would |
| * be impossible to provide). However the C++ standard is quite specific |
| * that actual engines only called the generate method, so it ought not to be |
| * a problem in practice. |
| */ |
| |
| template <typename RngType> |
| class seed_seq_from { |
| private: |
| RngType rng_; |
| |
| typedef uint_least32_t result_type; |
| |
| public: |
| template<typename... Args> |
| seed_seq_from(Args&&... args) : |
| rng_(std::forward<Args>(args)...) |
| { |
| // Nothing (else) to do... |
| } |
| |
| template<typename Iter> |
| void generate(Iter start, Iter finish) |
| { |
| for (auto i = start; i != finish; ++i) |
| *i = result_type(rng_()); |
| } |
| |
| constexpr size_t size() const |
| { |
| return (sizeof(typename RngType::result_type) > sizeof(result_type) |
| && RngType::max() > ~size_t(0UL)) |
| ? ~size_t(0UL) |
| : size_t(RngType::max()); |
| } |
| }; |
| |
| /* |
| * Sometimes you might want a distinct seed based on when the program |
| * was compiled. That way, a particular instance of the program will |
| * behave the same way, but when recompiled it'll produce a different |
| * value. |
| */ |
| |
| /* Disable the struct because the non-reproducability makes bugs |
| * harder to diagnose. |
| * warning: expansion of date or time macro is not reproducible |
| template <typename IntType> |
| struct static_arbitrary_seed { |
| private: |
| static constexpr IntType fnv(IntType hash, const char* pos) { |
| return *pos == '\0' |
| ? hash |
| : fnv((hash * IntType(16777619U)) ^ *pos, (pos+1)); |
| } |
| |
| public: |
| static constexpr IntType value = fnv(IntType(2166136261U ^ sizeof(IntType)), |
| __DATE__ __TIME__ __FILE__); |
| }; |
| */ |
| |
| // Sometimes, when debugging or testing, it's handy to be able print the name |
| // of a (in human-readable form). This code allows the idiom: |
| // |
| // cout << printable_typename<my_foo_type_t>() |
| // |
| // to print out my_foo_type_t (or its concrete type if it is a synonym) |
| |
| template <typename T> |
| struct printable_typename {}; |
| |
| template <typename T> |
| std::ostream& operator<<(std::ostream& out, printable_typename<T>) { |
| const char *implementation_typename = typeid(T).name(); |
| #ifdef __GNUC__ |
| int status; |
| const char* pretty_name = |
| abi::__cxa_demangle(implementation_typename, NULL, NULL, &status); |
| if (status == 0) |
| out << pretty_name; |
| free((void*) pretty_name); |
| if (status == 0) |
| return out; |
| #endif |
| out << implementation_typename; |
| return out; |
| } |
| |
| } // namespace pcg_extras |
| |
| #endif // PCG_EXTRAS_HPP_INCLUDED |