| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| /*! |
| * \file sampler.h |
| * \brief implementations of random sampling functors. |
| */ |
| |
| #ifndef MXNET_OPERATOR_RANDOM_SAMPLER_H_ |
| #define MXNET_OPERATOR_RANDOM_SAMPLER_H_ |
| |
| #include <algorithm> |
| |
| using namespace mshadow; |
| using namespace mxnet::op::mxnet_op; |
| using namespace mxnet::common::random; |
| |
| namespace mxnet { |
| namespace op { |
| |
| /*! |
| * \brief Launch a generic kernel with parallel random generator. |
| * \tparam gen random generator |
| * \tparam N Number of iterations |
| * \tparam Args Varargs type to eventually pass to the OP::Map() function |
| */ |
| template<typename OP, typename xpu, typename GType, typename ...Args> |
| inline static void LaunchRNG(mshadow::Stream<xpu> *s, |
| common::random::RandGenerator<xpu, GType> *gen, |
| const index_t N, Args... args) { |
| // minimal check to avoid division by zero, below. |
| // if `N` is zero the map operation is a no-op in any case. |
| if (N <= 0) { |
| return; |
| } |
| const index_t nloop = (N + RandGenerator<xpu>::kMinNumRandomPerThread - 1) / |
| RandGenerator<xpu>::kMinNumRandomPerThread; |
| const index_t nthread = std::min(nloop, |
| static_cast<index_t>(RandGenerator<xpu>::kNumRandomStates)); |
| const index_t step = (N + nthread - 1) / nthread; |
| Kernel<OP, xpu>::Launch(s, nthread, *gen, N, step, args...); |
| } |
| |
| #define RNG_KERNEL_LOOP(xpu, GType, thread_id, gen, N, step, ...) \ |
| const index_t start = thread_id * step; \ |
| const index_t end = start + step; \ |
| typename RandGenerator<xpu, GType>::Impl genImpl(&gen, thread_id); \ |
| for (index_t i = start; i < end && i < N; ++i) { \ |
| {__VA_ARGS__} \ |
| } |
| |
| template<typename xpu> |
| struct SampleUniformKernel { |
| template<typename IType, typename OType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *lower, const IType *upper, OType *out) { |
| RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| out[i] = OType(lower[i / nBatch] + |
| (upper[i / nBatch] - lower[i / nBatch]) * genImpl.uniform()); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct UniformSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lower, |
| const Tensor<xpu, 1, IType>& upper, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| LaunchRNG<SampleUniformKernel<xpu>, xpu>(s, pgen, out.size(0), lower.size(0), out.size(0), |
| lower.dptr_, upper.dptr_, out.dptr_); |
| } |
| }; |
| |
| template<typename xpu> |
| struct SampleRandIntKernel { |
| template<typename IType, typename OType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *lower, const IType *upper, OType *out) { |
| RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| if (sizeof(IType) == sizeof(int64_t)) |
| out[i] = OType(lower[i / nBatch] + |
| genImpl.rand_int64() % (upper[i / nBatch] - lower[i / nBatch])); |
| else |
| out[i] = OType(lower[i / nBatch] + |
| genImpl.rand() % (upper[i / nBatch] - lower[i / nBatch])); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct RandIntSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lower, |
| const Tensor<xpu, 1, IType>& upper, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| LaunchRNG<SampleRandIntKernel<xpu>, xpu>(s, pgen, out.size(0), lower.size(0), out.size(0), |
| lower.dptr_, upper.dptr_, out.dptr_); |
| } |
| }; |
| |
| template<typename xpu> |
| struct SampleNormalKernel { |
| template<typename IType, typename OType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *mean, const IType *std, OType *out) { |
| RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| out[i] = OType(genImpl.normal() * std[i / nBatch] + mean[i / nBatch]); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct NormalSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& mean, |
| const Tensor<xpu, 1, IType>& std, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| LaunchRNG<SampleNormalKernel<xpu>, xpu>(s, pgen, out.size(0), mean.size(0), out.size(0), |
| mean.dptr_, std.dptr_, out.dptr_); |
| } |
| }; |
| |
| template<typename xpu> |
| struct SampleExponentialKernel { |
| template<typename IType, typename OType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *lambda, OType *out) { |
| RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| out[i] = OType(-log(1.0 - genImpl.uniform()) / lambda[i / nBatch]); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct ExponentialSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lambda, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| LaunchRNG<SampleExponentialKernel<xpu>, xpu>(s, pgen, out.size(0), |
| lambda.size(0), out.size(0), |
| lambda.dptr_, out.dptr_); |
| } |
| }; |
| |
| template<typename xpu, typename IType, typename OType> |
| MSHADOW_XINLINE OType SampleGamma(IType a, IType b, typename RandGenerator<xpu, OType>::Impl *gen) { |
| // Generate one sample of the gamma distribution |
| OType sample; |
| OType d = a < 1 ? a + 2.0 / 3.0 : a - 1.0 / 3.0; |
| OType k = sqrt(9.0 * d); |
| OType c = 1.0 / k; |
| while (1) { |
| OType Z = gen->normal(); |
| if (Z > -k) { |
| OType x = 1.0 + c * Z; |
| OType V = x * x * x; |
| if (log(1.0-gen->uniform()) < 0.5 * Z * Z + d * (1.0 - V + log(V))) { |
| sample = d * V * b; |
| break; |
| } |
| } |
| } |
| return a < 1 ? sample * pow(gen->uniform(), OType(1.0 / a)) : sample; |
| } |
| |
| template<typename xpu> |
| struct SampleGammaKernel { |
| template<typename IType, typename OType, typename FType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, FType> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *alpha, const IType *beta, OType *out) { |
| RNG_KERNEL_LOOP(xpu, FType, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| out[i] = OType(SampleGamma<xpu, IType, FType>(alpha[i / nBatch], |
| beta[i / nBatch], &genImpl)); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct GammaSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& alpha, |
| const Tensor<xpu, 1, IType>& beta, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| typedef typename std::conditional<std::is_floating_point<OType>::value, |
| OType, float>::type FType; |
| RandGenerator<xpu, FType> *gen = reinterpret_cast<RandGenerator<xpu, FType> *>(pgen); |
| LaunchRNG<SampleGammaKernel<xpu>, xpu>(s, gen, out.size(0), alpha.size(0), out.size(0), |
| alpha.dptr_, beta.dptr_, out.dptr_); |
| } |
| }; |
| |
| template<typename xpu> |
| MSHADOW_XINLINE int SamplePoisson(float lambda, typename RandGenerator<xpu, float>::Impl *gen) { |
| // Generate one sample of the poisson distribution. Intentionally written |
| // towards a specific type (float) for internal computation which is sufficient |
| // for accurate enough computation. |
| if ( lambda < 12.0 ) { |
| float t = expf(-lambda); |
| int x = 0; |
| for ( float prod = gen->uniform(); prod > t; prod *= gen->uniform() ) { x += 1; } |
| return x; |
| } else { |
| // Approximation for high lambda according to: |
| // Numerical Recipes in C: The Art of Scientific Computing |
| // Cambridge University Press |
| const float pi(3.1415926); |
| const float sq(sqrt(2.0*lambda)); |
| const float loglambda(log(lambda)); |
| const float g(lambda*loglambda-lgammaf(lambda+1.0)); |
| float em(0), t(0), y(0); |
| do { |
| do { |
| y = tanf(pi * gen->uniform()); |
| em = sq * y + lambda; |
| } while (em < 0.0); |
| em = floorf(em); |
| t = 0.9 * (1.0 + y * y) * expf(em * loglambda - lgammaf(em + 1.0) - g); |
| } while (gen->uniform() > t); |
| return static_cast<int>(em); |
| } |
| } |
| |
| template<typename xpu> |
| struct SamplePoissonKernel { |
| template<typename IType, typename OType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, float> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *lambda, OType *out) { |
| RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| out[i] = OType(SamplePoisson<xpu>(lambda[i / nBatch], &genImpl)); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct PoissonSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lambda, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| RandGenerator<xpu, float> *gen = reinterpret_cast<RandGenerator<xpu, float> *>(pgen); |
| LaunchRNG<SamplePoissonKernel<xpu>, xpu>(s, gen, out.size(0), lambda.size(0), out.size(0), |
| lambda.dptr_, out.dptr_); |
| } |
| }; |
| |
| template<typename xpu> |
| struct SampleNegativeBinomialKernel { |
| template<typename IType, typename OType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, float> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *k, const IType *p, OType *out) { |
| RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| float alpha = k[i / nBatch]; |
| float prob = p[i / nBatch]; |
| float beta = (1.0 - prob) / prob; |
| float lambda = SampleGamma<xpu, IType, float>(alpha, beta, &genImpl); |
| out[i] = OType(SamplePoisson<xpu>(lambda, &genImpl)); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct NegativeBinomialSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& k, |
| const Tensor<xpu, 1, IType>& p, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| RandGenerator<xpu, float> *gen = reinterpret_cast<RandGenerator<xpu, float> *>(pgen); |
| LaunchRNG<SampleNegativeBinomialKernel<xpu>, xpu>(s, gen, out.size(0), k.size(0), out.size(0), |
| k.dptr_, p.dptr_, out.dptr_); |
| } |
| }; |
| |
| template<typename xpu> |
| struct SampleGeneralizedNegativeBinomialKernel { |
| template<typename IType, typename OType> |
| MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, float> gen, |
| const index_t N, const index_t step, |
| index_t nParm, index_t nSample, |
| const IType *mu, const IType *alpha, OType *out) { |
| RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, { |
| index_t nBatch(1 + (nSample - 1) / nParm); |
| float lambda = alpha[i / nBatch] == 0 ? |
| static_cast<float>(mu[i / nBatch]) : |
| SampleGamma<xpu, IType, float>(IType(1) / alpha[i / nBatch], |
| alpha[i / nBatch] * mu[i / nBatch], &genImpl); |
| out[i] = OType(SamplePoisson<xpu>(lambda, &genImpl)); |
| }); |
| } |
| }; |
| |
| template<typename xpu> |
| struct GeneralizedNegativeBinomialSampler { |
| template<typename IType, typename OType> |
| MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& mu, |
| const Tensor<xpu, 1, IType>& alpha, |
| const Tensor<xpu, 1, OType>& out, |
| RandGenerator<xpu, OType> *pgen, |
| Stream<xpu> *s) { |
| RandGenerator<xpu, float> *gen = reinterpret_cast<RandGenerator<xpu, float> *>(pgen); |
| LaunchRNG<SampleGeneralizedNegativeBinomialKernel<xpu>, xpu>(s, gen, out.size(0), |
| mu.size(0), out.size(0), |
| mu.dptr_, alpha.dptr_, out.dptr_); |
| } |
| }; |
| |
| } // namespace op |
| } // namespace mxnet |
| |
| #endif // MXNET_OPERATOR_RANDOM_SAMPLER_H_ |