blob: badd6a762972e78a0a4e859781e8580e79a05c0c [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.rng.sampling.distribution;
import org.apache.commons.rng.UniformRandomProvider;
/**
* Sampler for a discrete distribution using an optimised look-up table.
*
* <ul>
* <li>
* The method requires 30-bit integer probabilities that sum to 2<sup>30</sup> as described
* in George Marsaglia, Wai Wan Tsang, Jingbo Wang (2004) Fast Generation of Discrete
* Random Variables. Journal of Statistical Software. Vol. 11, Issue. 3, pp. 1-11.
* </li>
* </ul>
*
* <p>Sampling uses 1 call to {@link UniformRandomProvider#nextInt()}.</p>
*
* <p>Memory requirements depend on the maximum number of possible sample values, {@code n},
* and the values for the probabilities. Storage is optimised for {@code n}. The worst case
* scenario is a uniform distribution of the maximum sample size. This is capped at 0.06MB for
* {@code n <= } 2<sup>8</sup>, 17.0MB for {@code n <= } 2<sup>16</sup>, and 4.3GB for
* {@code n <=} 2<sup>30</sup>. Realistic requirements will be in the kB range.</p>
*
* <p>The sampler supports the following distributions:</p>
*
* <ul>
* <li>Enumerated distribution (probabilities must be provided for each sample)
* <li>Poisson distribution up to {@code mean = 1024}
* <li>Binomial distribution up to {@code trials = 65535}
* </ul>
*
* @see <a href="http://dx.doi.org/10.18637/jss.v011.i03">Margsglia, et al (2004) JSS Vol.
* 11, Issue 3</a>
* @since 1.3
*/
public final class MarsagliaTsangWangDiscreteSampler {
/** The value 2<sup>8</sup> as an {@code int}. */
private static final int INT_8 = 1 << 8;
/** The value 2<sup>16</sup> as an {@code int}. */
private static final int INT_16 = 1 << 16;
/** The value 2<sup>30</sup> as an {@code int}. */
private static final int INT_30 = 1 << 30;
/** The value 2<sup>31</sup> as a {@code double}. */
private static final double DOUBLE_31 = 1L << 31;
// =========================================================================
// Implementation note:
//
// This sampler uses prepared look-up tables that are searched using a single
// random int variate. The look-up tables contain the sample value. The tables
// are constructed using probabilities that sum to 2^30. The original paper
// by Marsaglia, et al (2004) describes the use of 5, 3, or 2 look-up tables
// indexed using digits of base 2^6, 2^10 or 2^15. Currently only base 64 (2^6)
// is supported using 5 look-up tables.
//
// The implementations use 8, 16 or 32 bit storage tables to support different
// distribution sizes with optimal storage. Separate class implementations of
// the same algorithm allow array storage to be accessed directly from 1D tables.
// This provides a performance gain over using: abstracted storage accessed via
// an interface; or a single 2D table.
//
// To allow the optimal implementation to be chosen the sampler is created
// using factory methods. The sampler supports any probability distribution
// when provided via an array of probabilities and the Poisson and Binomial
// distributions for a restricted set of parameters. The restrictions are
// imposed by the requirement to compute the entire probability distribution
// from the controlling parameter(s) using a recursive method. Factory
// constructors return a SharedStateDiscreteSampler instance. Each distribution
// type is contained in an inner class.
// =========================================================================
/**
* The base class for Marsaglia-Tsang-Wang samplers.
*/
private abstract static class AbstractMarsagliaTsangWangDiscreteSampler
implements SharedStateDiscreteSampler {
/** Underlying source of randomness. */
protected final UniformRandomProvider rng;
/** The name of the distribution. */
private final String distributionName;
/**
* @param rng Generator of uniformly distributed random numbers.
* @param distributionName Distribution name.
*/
AbstractMarsagliaTsangWangDiscreteSampler(UniformRandomProvider rng,
String distributionName) {
this.rng = rng;
this.distributionName = distributionName;
}
/**
* @param rng Generator of uniformly distributed random numbers.
* @param source Source to copy.
*/
AbstractMarsagliaTsangWangDiscreteSampler(UniformRandomProvider rng,
AbstractMarsagliaTsangWangDiscreteSampler source) {
this.rng = rng;
this.distributionName = source.distributionName;
}
/** {@inheritDoc} */
@Override
public String toString() {
return "Marsaglia Tsang Wang " + distributionName + " deviate [" + rng.toString() + "]";
}
}
/**
* An implementation for the sample algorithm based on the decomposition of the
* index in the range {@code [0,2^30)} into 5 base-64 digits with 8-bit backing storage.
*/
private static class MarsagliaTsangWangBase64Int8DiscreteSampler
extends AbstractMarsagliaTsangWangDiscreteSampler {
/** The mask to convert a {@code byte} to an unsigned 8-bit integer. */
private static final int MASK = 0xff;
/** Limit for look-up table 1. */
private final int t1;
/** Limit for look-up table 2. */
private final int t2;
/** Limit for look-up table 3. */
private final int t3;
/** Limit for look-up table 4. */
private final int t4;
/** Look-up table table1. */
private final byte[] table1;
/** Look-up table table2. */
private final byte[] table2;
/** Look-up table table3. */
private final byte[] table3;
/** Look-up table table4. */
private final byte[] table4;
/** Look-up table table5. */
private final byte[] table5;
/**
* @param rng Generator of uniformly distributed random numbers.
* @param distributionName Distribution name.
* @param prob The probabilities.
* @param offset The offset (must be positive).
*/
MarsagliaTsangWangBase64Int8DiscreteSampler(UniformRandomProvider rng,
String distributionName,
int[] prob,
int offset) {
super(rng, distributionName);
// Get table sizes for each base-64 digit
int n1 = 0;
int n2 = 0;
int n3 = 0;
int n4 = 0;
int n5 = 0;
for (final int m : prob) {
n1 += getBase64Digit(m, 1);
n2 += getBase64Digit(m, 2);
n3 += getBase64Digit(m, 3);
n4 += getBase64Digit(m, 4);
n5 += getBase64Digit(m, 5);
}
table1 = new byte[n1];
table2 = new byte[n2];
table3 = new byte[n3];
table4 = new byte[n4];
table5 = new byte[n5];
// Compute offsets
t1 = n1 << 24;
t2 = t1 + (n2 << 18);
t3 = t2 + (n3 << 12);
t4 = t3 + (n4 << 6);
n1 = n2 = n3 = n4 = n5 = 0;
// Fill tables
for (int i = 0; i < prob.length; i++) {
final int m = prob[i];
// Primitive type conversion will extract lower 8 bits
final byte k = (byte) (i + offset);
n1 = fill(table1, n1, n1 + getBase64Digit(m, 1), k);
n2 = fill(table2, n2, n2 + getBase64Digit(m, 2), k);
n3 = fill(table3, n3, n3 + getBase64Digit(m, 3), k);
n4 = fill(table4, n4, n4 + getBase64Digit(m, 4), k);
n5 = fill(table5, n5, n5 + getBase64Digit(m, 5), k);
}
}
/**
* @param rng Generator of uniformly distributed random numbers.
* @param source Source to copy.
*/
private MarsagliaTsangWangBase64Int8DiscreteSampler(UniformRandomProvider rng,
MarsagliaTsangWangBase64Int8DiscreteSampler source) {
super(rng, source);
t1 = source.t1;
t2 = source.t2;
t3 = source.t3;
t4 = source.t4;
table1 = source.table1;
table2 = source.table2;
table3 = source.table3;
table4 = source.table4;
table5 = source.table5;
}
/**
* Fill the table with the value.
*
* @param table Table.
* @param from Lower bound index (inclusive)
* @param to Upper bound index (exclusive)
* @param value Value.
* @return the upper bound index
*/
private static int fill(byte[] table, int from, int to, byte value) {
for (int i = from; i < to; i++) {
table[i] = value;
}
return to;
}
@Override
public int sample() {
final int j = rng.nextInt() >>> 2;
if (j < t1) {
return table1[j >>> 24] & MASK;
}
if (j < t2) {
return table2[(j - t1) >>> 18] & MASK;
}
if (j < t3) {
return table3[(j - t2) >>> 12] & MASK;
}
if (j < t4) {
return table4[(j - t3) >>> 6] & MASK;
}
// Note the tables are filled on the assumption that the sum of the probabilities.
// is >=2^30. If this is not true then the final table table5 will be smaller by the
// difference. So the tables *must* be constructed correctly.
return table5[j - t4] & MASK;
}
@Override
public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
return new MarsagliaTsangWangBase64Int8DiscreteSampler(rng, this);
}
}
/**
* An implementation for the sample algorithm based on the decomposition of the
* index in the range {@code [0,2^30)} into 5 base-64 digits with 16-bit backing storage.
*/
private static class MarsagliaTsangWangBase64Int16DiscreteSampler
extends AbstractMarsagliaTsangWangDiscreteSampler {
/** The mask to convert a {@code byte} to an unsigned 16-bit integer. */
private static final int MASK = 0xffff;
/** Limit for look-up table 1. */
private final int t1;
/** Limit for look-up table 2. */
private final int t2;
/** Limit for look-up table 3. */
private final int t3;
/** Limit for look-up table 4. */
private final int t4;
/** Look-up table table1. */
private final short[] table1;
/** Look-up table table2. */
private final short[] table2;
/** Look-up table table3. */
private final short[] table3;
/** Look-up table table4. */
private final short[] table4;
/** Look-up table table5. */
private final short[] table5;
/**
* @param rng Generator of uniformly distributed random numbers.
* @param distributionName Distribution name.
* @param prob The probabilities.
* @param offset The offset (must be positive).
*/
MarsagliaTsangWangBase64Int16DiscreteSampler(UniformRandomProvider rng,
String distributionName,
int[] prob,
int offset) {
super(rng, distributionName);
// Get table sizes for each base-64 digit
int n1 = 0;
int n2 = 0;
int n3 = 0;
int n4 = 0;
int n5 = 0;
for (final int m : prob) {
n1 += getBase64Digit(m, 1);
n2 += getBase64Digit(m, 2);
n3 += getBase64Digit(m, 3);
n4 += getBase64Digit(m, 4);
n5 += getBase64Digit(m, 5);
}
table1 = new short[n1];
table2 = new short[n2];
table3 = new short[n3];
table4 = new short[n4];
table5 = new short[n5];
// Compute offsets
t1 = n1 << 24;
t2 = t1 + (n2 << 18);
t3 = t2 + (n3 << 12);
t4 = t3 + (n4 << 6);
n1 = n2 = n3 = n4 = n5 = 0;
// Fill tables
for (int i = 0; i < prob.length; i++) {
final int m = prob[i];
// Primitive type conversion will extract lower 16 bits
final short k = (short) (i + offset);
n1 = fill(table1, n1, n1 + getBase64Digit(m, 1), k);
n2 = fill(table2, n2, n2 + getBase64Digit(m, 2), k);
n3 = fill(table3, n3, n3 + getBase64Digit(m, 3), k);
n4 = fill(table4, n4, n4 + getBase64Digit(m, 4), k);
n5 = fill(table5, n5, n5 + getBase64Digit(m, 5), k);
}
}
/**
* @param rng Generator of uniformly distributed random numbers.
* @param source Source to copy.
*/
private MarsagliaTsangWangBase64Int16DiscreteSampler(UniformRandomProvider rng,
MarsagliaTsangWangBase64Int16DiscreteSampler source) {
super(rng, source);
t1 = source.t1;
t2 = source.t2;
t3 = source.t3;
t4 = source.t4;
table1 = source.table1;
table2 = source.table2;
table3 = source.table3;
table4 = source.table4;
table5 = source.table5;
}
/**
* Fill the table with the value.
*
* @param table Table.
* @param from Lower bound index (inclusive)
* @param to Upper bound index (exclusive)
* @param value Value.
* @return the upper bound index
*/
private static int fill(short[] table, int from, int to, short value) {
for (int i = from; i < to; i++) {
table[i] = value;
}
return to;
}
@Override
public int sample() {
final int j = rng.nextInt() >>> 2;
if (j < t1) {
return table1[j >>> 24] & MASK;
}
if (j < t2) {
return table2[(j - t1) >>> 18] & MASK;
}
if (j < t3) {
return table3[(j - t2) >>> 12] & MASK;
}
if (j < t4) {
return table4[(j - t3) >>> 6] & MASK;
}
// Note the tables are filled on the assumption that the sum of the probabilities.
// is >=2^30. If this is not true then the final table table5 will be smaller by the
// difference. So the tables *must* be constructed correctly.
return table5[j - t4] & MASK;
}
@Override
public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
return new MarsagliaTsangWangBase64Int16DiscreteSampler(rng, this);
}
}
/**
* An implementation for the sample algorithm based on the decomposition of the
* index in the range {@code [0,2^30)} into 5 base-64 digits with 32-bit backing storage.
*/
private static class MarsagliaTsangWangBase64Int32DiscreteSampler
extends AbstractMarsagliaTsangWangDiscreteSampler {
/** Limit for look-up table 1. */
private final int t1;
/** Limit for look-up table 2. */
private final int t2;
/** Limit for look-up table 3. */
private final int t3;
/** Limit for look-up table 4. */
private final int t4;
/** Look-up table table1. */
private final int[] table1;
/** Look-up table table2. */
private final int[] table2;
/** Look-up table table3. */
private final int[] table3;
/** Look-up table table4. */
private final int[] table4;
/** Look-up table table5. */
private final int[] table5;
/**
* @param rng Generator of uniformly distributed random numbers.
* @param distributionName Distribution name.
* @param prob The probabilities.
* @param offset The offset (must be positive).
*/
MarsagliaTsangWangBase64Int32DiscreteSampler(UniformRandomProvider rng,
String distributionName,
int[] prob,
int offset) {
super(rng, distributionName);
// Get table sizes for each base-64 digit
int n1 = 0;
int n2 = 0;
int n3 = 0;
int n4 = 0;
int n5 = 0;
for (final int m : prob) {
n1 += getBase64Digit(m, 1);
n2 += getBase64Digit(m, 2);
n3 += getBase64Digit(m, 3);
n4 += getBase64Digit(m, 4);
n5 += getBase64Digit(m, 5);
}
table1 = new int[n1];
table2 = new int[n2];
table3 = new int[n3];
table4 = new int[n4];
table5 = new int[n5];
// Compute offsets
t1 = n1 << 24;
t2 = t1 + (n2 << 18);
t3 = t2 + (n3 << 12);
t4 = t3 + (n4 << 6);
n1 = n2 = n3 = n4 = n5 = 0;
// Fill tables
for (int i = 0; i < prob.length; i++) {
final int m = prob[i];
final int k = i + offset;
n1 = fill(table1, n1, n1 + getBase64Digit(m, 1), k);
n2 = fill(table2, n2, n2 + getBase64Digit(m, 2), k);
n3 = fill(table3, n3, n3 + getBase64Digit(m, 3), k);
n4 = fill(table4, n4, n4 + getBase64Digit(m, 4), k);
n5 = fill(table5, n5, n5 + getBase64Digit(m, 5), k);
}
}
/**
* @param rng Generator of uniformly distributed random numbers.
* @param source Source to copy.
*/
private MarsagliaTsangWangBase64Int32DiscreteSampler(UniformRandomProvider rng,
MarsagliaTsangWangBase64Int32DiscreteSampler source) {
super(rng, source);
t1 = source.t1;
t2 = source.t2;
t3 = source.t3;
t4 = source.t4;
table1 = source.table1;
table2 = source.table2;
table3 = source.table3;
table4 = source.table4;
table5 = source.table5;
}
/**
* Fill the table with the value.
*
* @param table Table.
* @param from Lower bound index (inclusive)
* @param to Upper bound index (exclusive)
* @param value Value.
* @return the upper bound index
*/
private static int fill(int[] table, int from, int to, int value) {
for (int i = from; i < to; i++) {
table[i] = value;
}
return to;
}
@Override
public int sample() {
final int j = rng.nextInt() >>> 2;
if (j < t1) {
return table1[j >>> 24];
}
if (j < t2) {
return table2[(j - t1) >>> 18];
}
if (j < t3) {
return table3[(j - t2) >>> 12];
}
if (j < t4) {
return table4[(j - t3) >>> 6];
}
// Note the tables are filled on the assumption that the sum of the probabilities.
// is >=2^30. If this is not true then the final table table5 will be smaller by the
// difference. So the tables *must* be constructed correctly.
return table5[j - t4];
}
@Override
public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
return new MarsagliaTsangWangBase64Int32DiscreteSampler(rng, this);
}
}
/** Class contains only static methods. */
private MarsagliaTsangWangDiscreteSampler() {}
/**
* Gets the k<sup>th</sup> base 64 digit of {@code m}.
*
* @param m the value m.
* @param k the digit.
* @return the base 64 digit
*/
private static int getBase64Digit(int m, int k) {
return (m >>> (30 - 6 * k)) & 63;
}
/**
* Convert the probability to an integer in the range [0,2^30]. This is the numerator of
* a fraction with assumed denominator 2<sup>30</sup>.
*
* @param p Probability.
* @return the fraction numerator
*/
private static int toUnsignedInt30(double p) {
return (int) (p * INT_30 + 0.5);
}
/**
* Create a new instance for probabilities {@code p(i)} where the sample value {@code x} is
* {@code i + offset}.
*
* <p>The sum of the probabilities must be >= 2<sup>30</sup>. Only the
* values for cumulative probability up to 2<sup>30</sup> will be sampled.</p>
*
* @param rng Generator of uniformly distributed random numbers.
* @param distributionName Distribution name.
* @param prob The probabilities.
* @param offset The offset (must be positive).
* @return Sampler.
*/
private static SharedStateDiscreteSampler createSampler(UniformRandomProvider rng,
String distributionName,
int[] prob,
int offset) {
// Note: No argument checks for private method.
// Choose implementation based on the maximum index
final int maxIndex = prob.length + offset - 1;
if (maxIndex < INT_8) {
return new MarsagliaTsangWangBase64Int8DiscreteSampler(rng, distributionName, prob, offset);
}
if (maxIndex < INT_16) {
return new MarsagliaTsangWangBase64Int16DiscreteSampler(rng, distributionName, prob, offset);
}
return new MarsagliaTsangWangBase64Int32DiscreteSampler(rng, distributionName, prob, offset);
}
// =========================================================================
// The following public classes provide factory methods to construct a sampler for:
// - Enumerated probability distribution (from provided double[] probabilities)
// - Poisson distribution for mean <= 1024
// - Binomial distribution for trials <= 65535
// =========================================================================
/**
* Create a sampler for an enumerated distribution of {@code n} values each with an
* associated probability.
* The samples corresponding to each probability are assumed to be a natural sequence
* starting at zero.
*/
public static final class Enumerated {
/** The name of the enumerated probability distribution. */
private static final String ENUMERATED_NAME = "Enumerated";
/** Class contains only static methods. */
private Enumerated() {}
/**
* Creates a sampler for an enumerated distribution of {@code n} values each with an
* associated probability.
*
* <p>The probabilities will be normalised using their sum. The only requirement
* is the sum is positive.</p>
*
* <p>The sum of the probabilities is normalised to 2<sup>30</sup>. Note that
* probabilities are adjusted to the nearest 1<sup>-30</sup> due to round-off during
* the normalisation conversion. Consequently any probability less than 2<sup>-31</sup>
* will not be observed in samples.</p>
*
* @param rng Generator of uniformly distributed random numbers.
* @param probabilities The list of probabilities.
* @return Sampler.
* @throws IllegalArgumentException if {@code probabilities} is null or empty, a
* probability is negative, infinite or {@code NaN}, or the sum of all
* probabilities is not strictly positive.
*/
public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
double[] probabilities) {
return createSampler(rng, ENUMERATED_NAME, normaliseProbabilities(probabilities), 0);
}
/**
* Normalise the probabilities to integers that sum to 2<sup>30</sup>.
*
* @param probabilities The list of probabilities.
* @return the normalised probabilities.
* @throws IllegalArgumentException if {@code probabilities} is null or empty, a
* probability is negative, infinite or {@code NaN}, or the sum of all
* probabilities is not strictly positive.
*/
private static int[] normaliseProbabilities(double[] probabilities) {
final double sumProb = InternalUtils.validateProbabilities(probabilities);
// Compute the normalisation: 2^30 / sum
final double normalisation = INT_30 / sumProb;
final int[] prob = new int[probabilities.length];
int sum = 0;
int max = 0;
int mode = 0;
for (int i = 0; i < prob.length; i++) {
// Add 0.5 for rounding
final int p = (int) (probabilities[i] * normalisation + 0.5);
sum += p;
// Find the mode (maximum probability)
if (max < p) {
max = p;
mode = i;
}
prob[i] = p;
}
// The sum must be >= 2^30.
// Here just compensate the difference onto the highest probability.
prob[mode] += INT_30 - sum;
return prob;
}
}
/**
* Create a sampler for the Poisson distribution.
*/
public static final class Poisson {
/** The name of the Poisson distribution. */
private static final String POISSON_NAME = "Poisson";
/**
* Upper bound on the mean for the Poisson distribution.
*
* <p>The original source code provided in Marsaglia, et al (2004) has no explicit
* limit but the code fails at mean >= 1941 as the transform to compute p(x=mode)
* produces infinity. Use a conservative limit of 1024.</p>
*/
private static final double MAX_MEAN = 1024;
/**
* The threshold for the mean of the Poisson distribution to switch the method used
* to compute the probabilities. This is taken from the example software provided by
* Marsaglia, et al (2004).
*/
private static final double MEAN_THRESHOLD = 21.4;
/** Class contains only static methods. */
private Poisson() {}
/**
* Creates a sampler for the Poisson distribution.
*
* <p>Any probability less than 2<sup>-31</sup> will not be observed in samples.</p>
*
* <p>Storage requirements depend on the tabulated probability values. Example storage
* requirements are listed below.</p>
*
* <pre>
* mean table size kB
* 0.25 882 0.88
* 0.5 1135 1.14
* 1 1200 1.20
* 2 1451 1.45
* 4 1955 1.96
* 8 2961 2.96
* 16 4410 4.41
* 32 6115 6.11
* 64 8499 8.50
* 128 11528 11.53
* 256 15935 31.87
* 512 20912 41.82
* 1024 30614 61.23
* </pre>
*
* <p>Note: Storage changes to 2 bytes per index between {@code mean=128} and {@code mean=256}.</p>
*
* @param rng Generator of uniformly distributed random numbers.
* @param mean Mean.
* @return Sampler.
* @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 1024}.
*/
public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
double mean) {
validatePoissonDistributionParameters(mean);
// Create the distribution either from X=0 or from X=mode when the mean is high.
return mean < MEAN_THRESHOLD ?
createPoissonDistributionFromX0(rng, mean) :
createPoissonDistributionFromXMode(rng, mean);
}
/**
* Validate the Poisson distribution parameters.
*
* @param mean Mean.
* @throws IllegalArgumentException if {@code mean <= 0} or {@code mean > 1024}.
*/
private static void validatePoissonDistributionParameters(double mean) {
if (mean <= 0) {
throw new IllegalArgumentException("mean is not strictly positive: " + mean);
}
if (mean > MAX_MEAN) {
throw new IllegalArgumentException("mean " + mean + " > " + MAX_MEAN);
}
}
/**
* Creates the Poisson distribution by computing probabilities recursively from {@code X=0}.
*
* @param rng Generator of uniformly distributed random numbers.
* @param mean Mean.
* @return Sampler.
*/
private static SharedStateDiscreteSampler createPoissonDistributionFromX0(
UniformRandomProvider rng, double mean) {
final double p0 = Math.exp(-mean);
// Recursive update of Poisson probability until the value is too small
// p(x + 1) = p(x) * mean / (x + 1)
double p = p0;
int i = 1;
while (p * DOUBLE_31 >= 1) {
p *= mean / i++;
}
// Probabilities are 30-bit integers, assumed denominator 2^30
final int size = i - 1;
final int[] prob = new int[size];
p = p0;
prob[0] = toUnsignedInt30(p);
// The sum must exceed 2^30. In edges cases this is false due to round-off.
int sum = prob[0];
for (i = 1; i < prob.length; i++) {
p *= mean / i;
prob[i] = toUnsignedInt30(p);
sum += prob[i];
}
// If the sum is < 2^30 add the remaining sum to the mode (floor(mean)).
prob[(int) mean] += Math.max(0, INT_30 - sum);
// Note: offset = 0
return createSampler(rng, POISSON_NAME, prob, 0);
}
/**
* Creates the Poisson distribution by computing probabilities recursively upward and downward
* from {@code X=mode}, the location of the largest p-value.
*
* @param rng Generator of uniformly distributed random numbers.
* @param mean Mean.
* @return Sampler.
*/
private static SharedStateDiscreteSampler createPoissonDistributionFromXMode(
UniformRandomProvider rng, double mean) {
// If mean >= 21.4, generate from largest p-value up, then largest down.
// The largest p-value will be at the mode (floor(mean)).
// Find p(x=mode)
final int mode = (int) mean;
// This transform is stable until mean >= 1941 where p will result in Infinity
// before the divisor i is large enough to start reducing the product (i.e. i > c).
final double c = mean * Math.exp(-mean / mode);
double p = 1.0;
for (int i = 1; i <= mode; i++) {
p *= c / i;
}
final double pMode = p;
// Find the upper limit using recursive computation of the p-value.
// Note this will exit when i overflows to negative so no check on the range
int i = mode + 1;
while (p * DOUBLE_31 >= 1) {
p *= mean / i++;
}
final int last = i - 2;
// Find the lower limit using recursive computation of the p-value.
p = pMode;
int j = -1;
for (i = mode - 1; i >= 0; i--) {
p *= (i + 1) / mean;
if (p * DOUBLE_31 < 1) {
j = i;
break;
}
}
// Probabilities are 30-bit integers, assumed denominator 2^30.
// This is the minimum sample value: prob[x - offset] = p(x)
final int offset = j + 1;
final int size = last - offset + 1;
final int[] prob = new int[size];
p = pMode;
prob[mode - offset] = toUnsignedInt30(p);
// The sum must exceed 2^30. In edges cases this is false due to round-off.
int sum = prob[mode - offset];
// From mode to upper limit
for (i = mode + 1; i <= last; i++) {
p *= mean / i;
prob[i - offset] = toUnsignedInt30(p);
sum += prob[i - offset];
}
// From mode to lower limit
p = pMode;
for (i = mode - 1; i >= offset; i--) {
p *= (i + 1) / mean;
prob[i - offset] = toUnsignedInt30(p);
sum += prob[i - offset];
}
// If the sum is < 2^30 add the remaining sum to the mode.
// If above 2^30 then the effect is truncation of the long tail of the distribution.
prob[mode - offset] += Math.max(0, INT_30 - sum);
return createSampler(rng, POISSON_NAME, prob, offset);
}
}
/**
* Create a sampler for the Binomial distribution.
*/
public static final class Binomial {
/** The name of the Binomial distribution. */
private static final String BINOMIAL_NAME = "Binomial";
/**
* Return a fixed result for the Binomial distribution. This is a special class to handle
* an edge case of probability of success equal to 0 or 1.
*/
private static class MarsagliaTsangWangFixedResultBinomialSampler
extends AbstractMarsagliaTsangWangDiscreteSampler {
/** The result. */
private final int result;
/**
* @param result Result.
*/
MarsagliaTsangWangFixedResultBinomialSampler(int result) {
super(null, BINOMIAL_NAME);
this.result = result;
}
@Override
public int sample() {
return result;
}
@Override
public String toString() {
return BINOMIAL_NAME + " deviate";
}
@Override
public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
// No shared state
return this;
}
}
/**
* Return an inversion result for the Binomial distribution. This assumes the
* following:
*
* <pre>
* Binomial(n, p) = 1 - Binomial(n, 1 - p)
* </pre>
*/
private static class MarsagliaTsangWangInversionBinomialSampler
extends AbstractMarsagliaTsangWangDiscreteSampler {
/** The number of trials. */
private final int trials;
/** The Binomial distribution sampler. */
private final SharedStateDiscreteSampler sampler;
/**
* @param trials Number of trials.
* @param sampler Binomial distribution sampler.
*/
MarsagliaTsangWangInversionBinomialSampler(int trials,
SharedStateDiscreteSampler sampler) {
super(null, BINOMIAL_NAME);
this.trials = trials;
this.sampler = sampler;
}
@Override
public int sample() {
return trials - sampler.sample();
}
@Override
public String toString() {
return sampler.toString();
}
@Override
public SharedStateDiscreteSampler withUniformRandomProvider(UniformRandomProvider rng) {
return new MarsagliaTsangWangInversionBinomialSampler(this.trials,
this.sampler.withUniformRandomProvider(rng));
}
}
/** Class contains only static methods. */
private Binomial() {}
/**
* Creates a sampler for the Binomial distribution.
*
* <p>Any probability less than 2<sup>-31</sup> will not be observed in samples.</p>
*
* <p>Storage requirements depend on the tabulated probability values. Example storage
* requirements are listed below (in kB).</p>
*
* <pre>
* p
* trials 0.5 0.1 0.01 0.001
* 4 0.06 0.63 0.44 0.44
* 16 0.69 1.14 0.76 0.44
* 64 4.73 2.40 1.14 0.51
* 256 8.63 5.17 1.89 0.82
* 1024 31.12 9.45 3.34 0.89
* </pre>
*
* <p>The method requires that the Binomial distribution probability at {@code x=0} can be computed.
* This will fail when {@code (1 - p)^trials == 0} which requires {@code trials} to be large
* and/or {@code p} to be small. In this case an exception is raised.</p>
*
* @param rng Generator of uniformly distributed random numbers.
* @param trials Number of trials.
* @param probabilityOfSuccess Probability of success (p).
* @return Sampler.
* @throws IllegalArgumentException if {@code trials < 0} or {@code trials >= 2^16},
* {@code p} is not in the range {@code [0-1]}, or the probability distribution cannot
* be computed.
*/
public static SharedStateDiscreteSampler of(UniformRandomProvider rng,
int trials,
double probabilityOfSuccess) {
validateBinomialDistributionParameters(trials, probabilityOfSuccess);
// Handle edge cases
if (probabilityOfSuccess == 0) {
return new MarsagliaTsangWangFixedResultBinomialSampler(0);
}
if (probabilityOfSuccess == 1) {
return new MarsagliaTsangWangFixedResultBinomialSampler(trials);
}
// Check the supported size.
if (trials >= INT_16) {
throw new IllegalArgumentException("Unsupported number of trials: " + trials);
}
return createBinomialDistributionSampler(rng, trials, probabilityOfSuccess);
}
/**
* Validate the Binomial distribution parameters.
*
* @param trials Number of trials.
* @param probabilityOfSuccess Probability of success (p).
* @throws IllegalArgumentException if {@code trials < 0} or
* {@code p} is not in the range {@code [0-1]}
*/
private static void validateBinomialDistributionParameters(int trials, double probabilityOfSuccess) {
if (trials < 0) {
throw new IllegalArgumentException("Trials is not positive: " + trials);
}
if (probabilityOfSuccess < 0 || probabilityOfSuccess > 1) {
throw new IllegalArgumentException("Probability is not in range [0,1]: " + probabilityOfSuccess);
}
}
/**
* Creates the Binomial distribution sampler.
*
* <p>This assumes the parameters for the distribution are valid. The method
* will only fail if the initial probability for {@code X=0} is zero.</p>
*
* @param rng Generator of uniformly distributed random numbers.
* @param trials Number of trials.
* @param probabilityOfSuccess Probability of success (p).
* @return Sampler.
* @throws IllegalArgumentException if the probability distribution cannot be
* computed.
*/
private static SharedStateDiscreteSampler createBinomialDistributionSampler(
UniformRandomProvider rng, int trials, double probabilityOfSuccess) {
// The maximum supported value for Math.exp is approximately -744.
// This occurs when trials is large and p is close to 1.
// Handle this by using an inversion: generate j=Binomial(n,1-p), return n-j
final boolean useInversion = probabilityOfSuccess > 0.5;
final double p = useInversion ? 1 - probabilityOfSuccess : probabilityOfSuccess;
// Check if the distribution can be computed
final double p0 = Math.exp(trials * Math.log(1 - p));
if (p0 < Double.MIN_VALUE) {
throw new IllegalArgumentException("Unable to compute distribution");
}
// First find size of probability array
double t = p0;
final double h = p / (1 - p);
// Find first probability above the threshold of 2^-31
int begin = 0;
if (t * DOUBLE_31 < 1) {
// Somewhere after p(0)
// Note:
// If this loop is entered p(0) is < 2^-31.
// This has been tested at the extreme for p(0)=Double.MIN_VALUE and either
// p=0.5 or trials=2^16-1 and does not fail to find the beginning.
for (int i = 1; i <= trials; i++) {
t *= (trials + 1 - i) * h / i;
if (t * DOUBLE_31 >= 1) {
begin = i;
break;
}
}
}
// Find last probability
int end = trials;
for (int i = begin + 1; i <= trials; i++) {
t *= (trials + 1 - i) * h / i;
if (t * DOUBLE_31 < 1) {
end = i - 1;
break;
}
}
return createBinomialDistributionSamplerFromRange(rng, trials, p, useInversion,
p0, begin, end);
}
/**
* Creates the Binomial distribution sampler using only the probability values for {@code X}
* between the begin and the end (inclusive).
*
* @param rng Generator of uniformly distributed random numbers.
* @param trials Number of trials.
* @param p Probability of success (p).
* @param useInversion Set to {@code true} if the probability was inverted.
* @param p0 Probability at {@code X=0}
* @param begin Begin value {@code X} for the distribution.
* @param end End value {@code X} for the distribution.
* @return Sampler.
*/
private static SharedStateDiscreteSampler createBinomialDistributionSamplerFromRange(
UniformRandomProvider rng, int trials, double p,
boolean useInversion, double p0, int begin, int end) {
// Assign probability values as 30-bit integers
final int size = end - begin + 1;
final int[] prob = new int[size];
double t = p0;
final double h = p / (1 - p);
for (int i = 1; i <= begin; i++) {
t *= (trials + 1 - i) * h / i;
}
int sum = toUnsignedInt30(t);
prob[0] = sum;
for (int i = begin + 1; i <= end; i++) {
t *= (trials + 1 - i) * h / i;
prob[i - begin] = toUnsignedInt30(t);
sum += prob[i - begin];
}
// If the sum is < 2^30 add the remaining sum to the mode (floor((n+1)p))).
// If above 2^30 then the effect is truncation of the long tail of the distribution.
final int mode = (int) ((trials + 1) * p) - begin;
prob[mode] += Math.max(0, INT_30 - sum);
final SharedStateDiscreteSampler sampler = createSampler(rng, BINOMIAL_NAME, prob, begin);
// Check if an inversion was made
return useInversion ?
new MarsagliaTsangWangInversionBinomialSampler(trials, sampler) :
sampler;
}
}
}