blob: 1a9bf7a4d1695455bffd56056b2071a19fa8b1fb [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*!
* \file sampler.h
* \brief implementations of random sampling functors.
*/
#ifndef MXNET_OPERATOR_RANDOM_SAMPLER_H_
#define MXNET_OPERATOR_RANDOM_SAMPLER_H_
#include <algorithm>
using namespace mshadow;
using namespace mxnet::op::mxnet_op;
using namespace mxnet::common::random;
namespace mxnet {
namespace op {
/*!
* \brief Launch a generic kernel with parallel random generator.
* \tparam gen random generator
* \tparam N Number of iterations
* \tparam Args Varargs type to eventually pass to the OP::Map() function
*/
template<typename OP, typename xpu, typename GType, typename ...Args>
inline static void LaunchRNG(mshadow::Stream<xpu> *s,
common::random::RandGenerator<xpu, GType> *gen,
const index_t N, Args... args) {
// minimal check to avoid division by zero, below.
// if `N` is zero the map operation is a no-op in any case.
if (N <= 0) {
return;
}
const index_t nloop = (N + RandGenerator<xpu>::kMinNumRandomPerThread - 1) /
RandGenerator<xpu>::kMinNumRandomPerThread;
const index_t nthread = std::min(nloop,
static_cast<index_t>(RandGenerator<xpu>::kNumRandomStates));
const index_t step = (N + nthread - 1) / nthread;
Kernel<OP, xpu>::Launch(s, nthread, *gen, N, step, args...);
}
#define RNG_KERNEL_LOOP(xpu, GType, thread_id, gen, N, step, ...) \
const index_t start = thread_id * step; \
const index_t end = start + step; \
typename RandGenerator<xpu, GType>::Impl genImpl(&gen, thread_id); \
for (index_t i = start; i < end && i < N; ++i) { \
{__VA_ARGS__} \
}
template<typename xpu>
struct SampleUniformKernel {
template<typename IType, typename OType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *lower, const IType *upper, OType *out) {
RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
out[i] = OType(lower[i / nBatch] +
(upper[i / nBatch] - lower[i / nBatch]) * genImpl.uniform());
});
}
};
template<typename xpu>
struct UniformSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lower,
const Tensor<xpu, 1, IType>& upper,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
LaunchRNG<SampleUniformKernel<xpu>, xpu>(s, pgen, out.size(0), lower.size(0), out.size(0),
lower.dptr_, upper.dptr_, out.dptr_);
}
};
template<typename xpu>
struct SampleRandIntKernel {
template<typename IType, typename OType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *lower, const IType *upper, OType *out) {
RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
if (sizeof(IType) == sizeof(int64_t))
out[i] = OType(lower[i / nBatch] +
genImpl.rand_int64() % (upper[i / nBatch] - lower[i / nBatch]));
else
out[i] = OType(lower[i / nBatch] +
genImpl.rand() % (upper[i / nBatch] - lower[i / nBatch]));
});
}
};
template<typename xpu>
struct RandIntSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lower,
const Tensor<xpu, 1, IType>& upper,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
LaunchRNG<SampleRandIntKernel<xpu>, xpu>(s, pgen, out.size(0), lower.size(0), out.size(0),
lower.dptr_, upper.dptr_, out.dptr_);
}
};
template<typename xpu>
struct SampleNormalKernel {
template<typename IType, typename OType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *mean, const IType *std, OType *out) {
RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
out[i] = OType(genImpl.normal() * std[i / nBatch] + mean[i / nBatch]);
});
}
};
template<typename xpu>
struct NormalSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& mean,
const Tensor<xpu, 1, IType>& std,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
LaunchRNG<SampleNormalKernel<xpu>, xpu>(s, pgen, out.size(0), mean.size(0), out.size(0),
mean.dptr_, std.dptr_, out.dptr_);
}
};
template<typename xpu>
struct SampleExponentialKernel {
template<typename IType, typename OType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, OType> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *lambda, OType *out) {
RNG_KERNEL_LOOP(xpu, OType, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
out[i] = OType(-log(1.0 - genImpl.uniform()) / lambda[i / nBatch]);
});
}
};
template<typename xpu>
struct ExponentialSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lambda,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
LaunchRNG<SampleExponentialKernel<xpu>, xpu>(s, pgen, out.size(0),
lambda.size(0), out.size(0),
lambda.dptr_, out.dptr_);
}
};
template<typename xpu, typename IType, typename OType>
MSHADOW_XINLINE OType SampleGamma(IType a, IType b, typename RandGenerator<xpu, OType>::Impl *gen) {
// Generate one sample of the gamma distribution
OType sample;
OType d = a < 1 ? a + 2.0 / 3.0 : a - 1.0 / 3.0;
OType k = sqrt(9.0 * d);
OType c = 1.0 / k;
while (1) {
OType Z = gen->normal();
if (Z > -k) {
OType x = 1.0 + c * Z;
OType V = x * x * x;
if (log(1.0-gen->uniform()) < 0.5 * Z * Z + d * (1.0 - V + log(V))) {
sample = d * V * b;
break;
}
}
}
return a < 1 ? sample * pow(gen->uniform(), OType(1.0 / a)) : sample;
}
template<typename xpu>
struct SampleGammaKernel {
template<typename IType, typename OType, typename FType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, FType> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *alpha, const IType *beta, OType *out) {
RNG_KERNEL_LOOP(xpu, FType, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
out[i] = OType(SampleGamma<xpu, IType, FType>(alpha[i / nBatch],
beta[i / nBatch], &genImpl));
});
}
};
template<typename xpu>
struct GammaSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& alpha,
const Tensor<xpu, 1, IType>& beta,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
typedef typename std::conditional<std::is_floating_point<OType>::value,
OType, float>::type FType;
RandGenerator<xpu, FType> *gen = reinterpret_cast<RandGenerator<xpu, FType> *>(pgen);
LaunchRNG<SampleGammaKernel<xpu>, xpu>(s, gen, out.size(0), alpha.size(0), out.size(0),
alpha.dptr_, beta.dptr_, out.dptr_);
}
};
template<typename xpu>
MSHADOW_XINLINE int SamplePoisson(float lambda, typename RandGenerator<xpu, float>::Impl *gen) {
// Generate one sample of the poisson distribution. Intentionally written
// towards a specific type (float) for internal computation which is sufficient
// for accurate enough computation.
if ( lambda < 12.0 ) {
float t = expf(-lambda);
int x = 0;
for ( float prod = gen->uniform(); prod > t; prod *= gen->uniform() ) { x += 1; }
return x;
} else {
// Approximation for high lambda according to:
// Numerical Recipes in C: The Art of Scientific Computing
// Cambridge University Press
const float pi(3.1415926);
const float sq(sqrt(2.0*lambda));
const float loglambda(log(lambda));
const float g(lambda*loglambda-lgammaf(lambda+1.0));
float em(0), t(0), y(0);
do {
do {
y = tanf(pi * gen->uniform());
em = sq * y + lambda;
} while (em < 0.0);
em = floorf(em);
t = 0.9 * (1.0 + y * y) * expf(em * loglambda - lgammaf(em + 1.0) - g);
} while (gen->uniform() > t);
return static_cast<int>(em);
}
}
template<typename xpu>
struct SamplePoissonKernel {
template<typename IType, typename OType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, float> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *lambda, OType *out) {
RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
out[i] = OType(SamplePoisson<xpu>(lambda[i / nBatch], &genImpl));
});
}
};
template<typename xpu>
struct PoissonSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& lambda,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
RandGenerator<xpu, float> *gen = reinterpret_cast<RandGenerator<xpu, float> *>(pgen);
LaunchRNG<SamplePoissonKernel<xpu>, xpu>(s, gen, out.size(0), lambda.size(0), out.size(0),
lambda.dptr_, out.dptr_);
}
};
template<typename xpu>
struct SampleNegativeBinomialKernel {
template<typename IType, typename OType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, float> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *k, const IType *p, OType *out) {
RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
float alpha = k[i / nBatch];
float prob = p[i / nBatch];
float beta = (1.0 - prob) / prob;
float lambda = SampleGamma<xpu, IType, float>(alpha, beta, &genImpl);
out[i] = OType(SamplePoisson<xpu>(lambda, &genImpl));
});
}
};
template<typename xpu>
struct NegativeBinomialSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& k,
const Tensor<xpu, 1, IType>& p,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
RandGenerator<xpu, float> *gen = reinterpret_cast<RandGenerator<xpu, float> *>(pgen);
LaunchRNG<SampleNegativeBinomialKernel<xpu>, xpu>(s, gen, out.size(0), k.size(0), out.size(0),
k.dptr_, p.dptr_, out.dptr_);
}
};
template<typename xpu>
struct SampleGeneralizedNegativeBinomialKernel {
template<typename IType, typename OType>
MSHADOW_XINLINE static void Map(index_t id, RandGenerator<xpu, float> gen,
const index_t N, const index_t step,
index_t nParm, index_t nSample,
const IType *mu, const IType *alpha, OType *out) {
RNG_KERNEL_LOOP(xpu, float, id, gen, N, step, {
index_t nBatch(1 + (nSample - 1) / nParm);
float lambda = alpha[i / nBatch] == 0 ?
static_cast<float>(mu[i / nBatch]) :
SampleGamma<xpu, IType, float>(IType(1) / alpha[i / nBatch],
alpha[i / nBatch] * mu[i / nBatch], &genImpl);
out[i] = OType(SamplePoisson<xpu>(lambda, &genImpl));
});
}
};
template<typename xpu>
struct GeneralizedNegativeBinomialSampler {
template<typename IType, typename OType>
MSHADOW_FORCE_INLINE void Sample(const Tensor<xpu, 1, IType>& mu,
const Tensor<xpu, 1, IType>& alpha,
const Tensor<xpu, 1, OType>& out,
RandGenerator<xpu, OType> *pgen,
Stream<xpu> *s) {
RandGenerator<xpu, float> *gen = reinterpret_cast<RandGenerator<xpu, float> *>(pgen);
LaunchRNG<SampleGeneralizedNegativeBinomialKernel<xpu>, xpu>(s, gen, out.size(0),
mu.size(0), out.size(0),
mu.dptr_, alpha.dptr_, out.dptr_);
}
};
} // namespace op
} // namespace mxnet
#endif // MXNET_OPERATOR_RANDOM_SAMPLER_H_