Merge in master

commit: 406a41d33cacf63d885958e2a73b1d7dbd8a6abd [log] [tgz]
author: Rob Tompkins <chtompki@gmail.com> Mon Aug 06 08:16:21 2018 -0400
committer: Rob Tompkins <chtompki@gmail.com> Mon Aug 06 08:16:21 2018 -0400
tree: 57b22371f18848a659cc8a4159840c226c46d8f7
parent: f73546863611e5dd850fd311277f2019259489ce [diff]
parent: 2a31d8764facb77c8a3c301b0613078df8c93d13 [diff]
diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java
new file mode 100644
index 0000000..a729686
--- /dev/null
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/LargeMeanPoissonSampler.java

@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling.distribution;
+
+import org.apache.commons.rng.UniformRandomProvider;
+import org.apache.commons.rng.sampling.distribution.InternalUtils.FactorialLog;
+
+/**
+ * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution</a>.
+ *
+ * <ul>
+ *  <li>
+ *   For large means, we use the rejection algorithm described in
+ *   <blockquote>
+ *    Devroye, Luc. (1981).<i>The Computer Generation of Poisson Random Variables</i><br>
+ *    <strong>Computing</strong> vol. 26 pp. 197-207.
+ *   </blockquote>
+ *  </li>
+ * </ul>
+ *
+ * This sampler is suitable for {@code mean >= 40}.
+ */
+public class LargeMeanPoissonSampler
+    extends SamplerBase
+    implements DiscreteSampler {
+
+    /** Class to compute {@code log(n!)}. This has no cached values. */
+    private static final InternalUtils.FactorialLog NO_CACHE_FACTORIAL_LOG;
+
+    static {
+        // Create without a cache.
+        NO_CACHE_FACTORIAL_LOG = FactorialLog.create();
+    }
+
+    /** Exponential. */
+    private final ContinuousSampler exponential;
+    /** Gaussian. */
+    private final ContinuousSampler gaussian;
+    /** Local class to compute {@code log(n!)}. This may have cached values. */
+    private final InternalUtils.FactorialLog factorialLog;
+
+    // Working values
+
+    /** Algorithm constant: {@code Math.floor(mean)}. */
+    private final double lambda;
+    /** Algorithm constant: {@code mean - lambda}. */
+    private final double lambdaFractional;
+    /** Algorithm constant: {@code Math.log(lambda)}. */
+    private final double logLambda;
+    /** Algorithm constant: {@code factorialLog((int) lambda)}. */
+    private final double logLambdaFactorial;
+    /** Algorithm constant: {@code Math.sqrt(lambda * Math.log(32 * lambda / Math.PI + 1))}. */
+    private final double delta;
+    /** Algorithm constant: {@code delta / 2}. */
+    private final double halfDelta;
+    /** Algorithm constant: {@code 2 * lambda + delta}. */
+    private final double twolpd;
+    /**
+     * Algorithm constant: {@code a1 / aSum} with
+     * <ul>
+     *  <li>{@code a1 = Math.sqrt(Math.PI * twolpd) * Math.exp(c1)}</li>
+     *  <li>{@code aSum = a1 + a2 + 1}</li>
+     * </ul>
+     */
+    private final double p1;
+    /**
+     * Algorithm constant: {@code a1 / aSum} with
+     * <ul>
+     *  <li>{@code a2 = (twolpd / delta) * Math.exp(-delta * (1 + delta) / twolpd)}</li>
+     *  <li>{@code aSum = a1 + a2 + 1}</li>
+     * </ul>
+     */
+    private final double p2;
+    /** Algorithm constant: {@code 1 / (8 * lambda)}. */
+    private final double c1;
+
+    /** The internal Poisson sampler for the lambda fraction. */
+    private final DiscreteSampler smallMeanPoissonSampler;
+
+    /**
+     * @param rng  Generator of uniformly distributed random numbers.
+     * @param mean Mean.
+     * @throws IllegalArgumentException if {@code mean <= 0}.
+     */
+    public LargeMeanPoissonSampler(UniformRandomProvider rng,
+                                   double mean) {
+        super(rng);
+        if (mean <= 0) {
+            throw new IllegalArgumentException(mean + " <= " + 0);
+        }
+
+        gaussian = new ZigguratNormalizedGaussianSampler(rng);
+        exponential = new AhrensDieterExponentialSampler(rng, 1);
+        // Plain constructor uses the uncached function.
+        factorialLog = NO_CACHE_FACTORIAL_LOG;
+
+        // Cache values used in the algorithm
+        lambda = Math.floor(mean);
+        lambdaFractional = mean - lambda;
+        logLambda = Math.log(lambda);
+        logLambdaFactorial = factorialLog((int) lambda);
+        delta = Math.sqrt(lambda * Math.log(32 * lambda / Math.PI + 1));
+        halfDelta = delta / 2;
+        twolpd = 2 * lambda + delta;
+        c1 = 1 / (8 * lambda);
+        final double a1 = Math.sqrt(Math.PI * twolpd) * Math.exp(c1);
+        final double a2 = (twolpd / delta) * Math.exp(-delta * (1 + delta) / twolpd);
+        final double aSum = a1 + a2 + 1;
+        p1 = a1 / aSum;
+        p2 = a2 / aSum;
+
+        // The algorithm requires a Poisson sample from the remaining lambda fraction.
+        smallMeanPoissonSampler = (lambdaFractional < Double.MIN_VALUE) ?
+            null : // Not used.
+            new SmallMeanPoissonSampler(rng, lambdaFractional);
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public int sample() {
+
+        final int y2 = (smallMeanPoissonSampler == null) ?
+            0 : // No lambda fraction
+            smallMeanPoissonSampler.sample();
+
+        double x = 0;
+        double y = 0;
+        double v = 0;
+        int a = 0;
+        double t = 0;
+        double qr = 0;
+        double qa = 0;
+        while (true) {
+            final double u = nextDouble();
+            if (u <= p1) {
+                final double n = gaussian.sample();
+                x = n * Math.sqrt(lambda + halfDelta) - 0.5d;
+                if (x > delta || x < -lambda) {
+                    continue;
+                }
+                y = x < 0 ? Math.floor(x) : Math.ceil(x);
+                final double e = exponential.sample();
+                v = -e - 0.5 * n * n + c1;
+            } else {
+                if (u > p1 + p2) {
+                    y = lambda;
+                    break;
+                }
+                x = delta + (twolpd / delta) * exponential.sample();
+                y = Math.ceil(x);
+                v = -exponential.sample() - delta * (x + 1) / twolpd;
+            }
+            a = x < 0 ? 1 : 0;
+            t = y * (y + 1) / (2 * lambda);
+            if (v < -t && a == 0) {
+                y = lambda + y;
+                break;
+            }
+            qr = t * ((2 * y + 1) / (6 * lambda) - 1);
+            qa = qr - (t * t) / (3 * (lambda + a * (y + 1)));
+            if (v < qa) {
+                y = lambda + y;
+                break;
+            }
+            if (v > qr) {
+                continue;
+            }
+            if (v < y * logLambda - factorialLog((int) (y + lambda)) + logLambdaFactorial) {
+                y = lambda + y;
+                break;
+            }
+        }
+
+        return (int) Math.min(y2 + (long) y, Integer.MAX_VALUE);
+    }
+
+    /**
+     * Compute the natural logarithm of the factorial of {@code n}.
+     *
+     * @param n Argument.
+     * @return {@code log(n!)}
+     * @throws IllegalArgumentException if {@code n < 0}.
+     */
+    private double factorialLog(int n) {
+        return factorialLog.value(n);
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public String toString() {
+        return "Large Mean Poisson deviate [" + super.toString() + "]";
+    }
+}

diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSampler.java
index 195e725..cd9187a 100644
--- a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSampler.java
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/PoissonSampler.java

@@ -30,25 +30,19 @@
  *  <li>
  *   For large means, we use the rejection algorithm described in
  *   <blockquote>
- *    Devroye, Luc. (1981).<i>The Computer Generation of Poisson Random Variables</i><br>
+ *    Devroye, Luc. (1981). <i>The Computer Generation of Poisson Random Variables</i><br>
  *    <strong>Computing</strong> vol. 26 pp. 197-207.
  *   </blockquote>
  *  </li>
  * </ul>
  */
 public class PoissonSampler
-    extends SamplerBase
     implements DiscreteSampler {
+
     /** Value for switching sampling algorithm. */
     private static final double PIVOT = 40;
-    /** Mean of the distribution. */
-    private final double mean;
-    /** Exponential. */
-    private final ContinuousSampler exponential;
-    /** Gaussian. */
-    private final NormalizedGaussianSampler gaussian;
-    /** {@code log(n!)}. */
-    private final InternalUtils.FactorialLog factorialLog;
+    /** The internal Poisson sampler. */
+    private final DiscreteSampler poissonSampler;
 
     /**
      * @param rng Generator of uniformly distributed random numbers.
@@ -57,24 +51,17 @@
      */
     public PoissonSampler(UniformRandomProvider rng,
                           double mean) {
-        super(rng);
-        if (mean <= 0) {
-            throw new IllegalArgumentException(mean + " <= " + 0);
-        }
-
-        this.mean = mean;
-
-        gaussian = new ZigguratNormalizedGaussianSampler(rng);
-        exponential = new AhrensDieterExponentialSampler(rng, 1);
-        factorialLog = mean < PIVOT ?
-            null : // Not used.
-            InternalUtils.FactorialLog.create().withCache((int) Math.min(mean, 2 * PIVOT));
+        // Delegate all work to specialised samplers.
+        // These should check the input arguments.
+        poissonSampler = mean < PIVOT ?
+            new SmallMeanPoissonSampler(rng, mean) :
+            new LargeMeanPoissonSampler(rng, mean);
     }
 
     /** {@inheritDoc} */
     @Override
     public int sample() {
-        return (int) Math.min(nextPoisson(mean), Integer.MAX_VALUE);
+        return poissonSampler.sample();
     }
 
     /** {@inheritDoc} */
@@ -82,103 +69,4 @@
     public String toString() {
         return "Poisson deviate [" + super.toString() + "]";
     }
-
-    /**
-     * @param meanPoisson Mean.
-     * @return the next sample.
-     */
-    private long nextPoisson(double meanPoisson) {
-        if (meanPoisson < PIVOT) {
-            double p = Math.exp(-meanPoisson);
-            long n = 0;
-            double r = 1;
-
-            while (n < 1000 * meanPoisson) {
-                r *= nextDouble();
-                if (r >= p) {
-                    n++;
-                } else {
-                    break;
-                }
-            }
-            return n;
-        } else {
-            final double lambda = Math.floor(meanPoisson);
-            final double lambdaFractional = meanPoisson - lambda;
-            final double logLambda = Math.log(lambda);
-            final double logLambdaFactorial = factorialLog((int) lambda);
-            final long y2 = lambdaFractional < Double.MIN_VALUE ? 0 : nextPoisson(lambdaFractional);
-            final double delta = Math.sqrt(lambda * Math.log(32 * lambda / Math.PI + 1));
-            final double halfDelta = delta / 2;
-            final double twolpd = 2 * lambda + delta;
-            final double a1 = Math.sqrt(Math.PI * twolpd) * Math.exp(1 / (8 * lambda));
-            final double a2 = (twolpd / delta) * Math.exp(-delta * (1 + delta) / twolpd);
-            final double aSum = a1 + a2 + 1;
-            final double p1 = a1 / aSum;
-            final double p2 = a2 / aSum;
-            final double c1 = 1 / (8 * lambda);
-
-            double x;
-            double y;
-            double v;
-            int a;
-            double t;
-            double qr;
-            double qa;
-            while (true) {
-                final double u = nextDouble();
-                if (u <= p1) {
-                    final double n = gaussian.sample();
-                    x = n * Math.sqrt(lambda + halfDelta) - 0.5;
-                    if (x > delta ||
-                        x < -lambda) {
-                        continue;
-                    }
-                    y = x < 0 ? Math.floor(x) : Math.ceil(x);
-                    final double e = exponential.sample();
-                    v = -e - 0.5 * n * n + c1;
-                } else {
-                    if (u > p1 + p2) {
-                        y = lambda;
-                        break;
-                    } else {
-                        x = delta + (twolpd / delta) * exponential.sample();
-                        y = Math.ceil(x);
-                        v = -exponential.sample() - delta * (x + 1) / twolpd;
-                    }
-                }
-                a = x < 0 ? 1 : 0;
-                t = y * (y + 1) / (2 * lambda);
-                if (v < -t && a == 0) {
-                    y = lambda + y;
-                    break;
-                }
-                qr = t * ((2 * y + 1) / (6 * lambda) - 1);
-                qa = qr - (t * t) / (3 * (lambda + a * (y + 1)));
-                if (v < qa) {
-                    y = lambda + y;
-                    break;
-                }
-                if (v > qr) {
-                    continue;
-                }
-                if (v < y * logLambda - factorialLog((int) (y + lambda)) + logLambdaFactorial) {
-                    y = lambda + y;
-                    break;
-                }
-            }
-            return y2 + (long) y;
-        }
-    }
-
-    /**
-     * Compute the natural logarithm of the factorial of {@code n}.
-     *
-     * @param n Argument.
-     * @return {@code log(n!)}
-     * @throws IllegalArgumentException if {@code n < 0}.
-     */
-    private double factorialLog(int n) {
-        return factorialLog.value(n);
-    }
 }

diff --git a/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.java b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.java
new file mode 100644
index 0000000..283594e
--- /dev/null
+++ b/commons-rng-sampling/src/main/java/org/apache/commons/rng/sampling/distribution/SmallMeanPoissonSampler.java

@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.commons.rng.sampling.distribution;
+
+import org.apache.commons.rng.UniformRandomProvider;
+
+/**
+ * Sampler for the <a href="http://mathworld.wolfram.com/PoissonDistribution.html">Poisson distribution</a>.
+ *
+ * <ul>
+ *  <li>
+ *   For small means, a Poisson process is simulated using uniform deviates, as
+ *   described <a href="http://mathaa.epfl.ch/cours/PMMI2001/interactive/rng7.htm">here</a>.
+ *   The Poisson process (and hence, the returned value) is bounded by 1000 * mean.
+ *  </li>
+ * </ul>
+ *
+ * This sampler is suitable for {@code mean < 40}.
+ */
+public class SmallMeanPoissonSampler
+    extends SamplerBase
+    implements DiscreteSampler {
+
+    /**
+     * Pre-compute {@code Math.exp(-mean)}.
+     * Note: This is the probability of the Poisson sample {@code P(n=0)}.
+     */
+    private final double p0;
+    /** Pre-compute {@code 1000 * mean} as the upper limit of the sample. */
+    private final int limit;
+
+    /**
+     * @param rng  Generator of uniformly distributed random numbers.
+     * @param mean Mean.
+     * @throws IllegalArgumentException if {@code mean <= 0}.
+     */
+    public SmallMeanPoissonSampler(UniformRandomProvider rng,
+                                   double mean) {
+        super(rng);
+        if (mean <= 0) {
+            throw new IllegalArgumentException(mean + " <= " + 0);
+        }
+
+        p0 = Math.exp(-mean);
+        // The returned sample is bounded by 1000 * mean or Integer.MAX_VALUE
+        limit = (int) Math.ceil(Math.min(1000 * mean, Integer.MAX_VALUE));
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public int sample() {
+        int n = 0;
+        double r = 1;
+
+        while (n < limit) {
+            r *= nextDouble();
+            if (r >= p0) {
+                n++;
+            } else {
+                break;
+            }
+        }
+        return n;
+    }
+
+    /** {@inheritDoc} */
+    @Override
+    public String toString() {
+        return "Small Mean Poisson deviate [" + super.toString() + "]";
+    }
+}

diff --git a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java
index 2c02bdb..23122c7 100644
--- a/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java
+++ b/commons-rng-sampling/src/test/java/org/apache/commons/rng/sampling/distribution/DiscreteSamplersList.java

@@ -16,7 +16,6 @@
  */
 package org.apache.commons.rng.sampling.distribution;
 
-import java.util.Arrays;
 import java.util.List;
 import java.util.ArrayList;
 import java.util.Collections;
@@ -109,16 +108,28 @@
             add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(meanPoisson),
                 MathArrays.sequence(10, 0, 1),
                 new PoissonSampler(RandomSource.create(RandomSource.KISS), meanPoisson));
+            // Dedicated small mean poisson sampler
+            add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(meanPoisson),
+                MathArrays.sequence(10, 0, 1),
+                new SmallMeanPoissonSampler(RandomSource.create(RandomSource.KISS), meanPoisson));
             // Poisson (40 < mean < 80).
             final double largeMeanPoisson = 67.89;
             add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(largeMeanPoisson),
                 MathArrays.sequence(50, (int) (largeMeanPoisson - 25), 1),
                 new PoissonSampler(RandomSource.create(RandomSource.SPLIT_MIX_64), largeMeanPoisson));
+            // Dedicated large mean poisson sampler
+            add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(largeMeanPoisson),
+                MathArrays.sequence(50, (int) (largeMeanPoisson - 25), 1),
+                new LargeMeanPoissonSampler(RandomSource.create(RandomSource.SPLIT_MIX_64), largeMeanPoisson));
             // Poisson (mean >> 40).
             final double veryLargeMeanPoisson = 543.21;
             add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(veryLargeMeanPoisson),
                 MathArrays.sequence(100, (int) (veryLargeMeanPoisson - 50), 1),
                 new PoissonSampler(RandomSource.create(RandomSource.SPLIT_MIX_64), veryLargeMeanPoisson));
+            // Dedicated large mean poisson sampler
+            add(LIST, new org.apache.commons.math3.distribution.PoissonDistribution(veryLargeMeanPoisson),
+                MathArrays.sequence(100, (int) (veryLargeMeanPoisson - 50), 1),
+                new LargeMeanPoissonSampler(RandomSource.create(RandomSource.SPLIT_MIX_64), veryLargeMeanPoisson));
         } catch (Exception e) {
             System.err.println("Unexpected exception while creating the list of samplers: " + e);
             e.printStackTrace(System.err);

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 8901f17..8fba90c 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml

@@ -51,7 +51,7 @@
   </properties>
   <body>
 
-    <release version="1.1" date="2018-08-05" description="
+    <release version="1.1" date="2018-08-06" description="
 This is a minor release of Apache Commons RNG, containing a
 few new features and performance improvements.
 
@@ -76,6 +76,12 @@
 to re-run tests that fail and pass the build if they succeed (the test will
 be marked as 'flaky' in the report).
 ">
+      <action dev="erans" type="update" issue="RNG-50" due-to="Alex D. Herbert">
+	"PoissonSampler": Algorithms for small mean and large mean have
+	been separated into dedicated classes.  Cache precomputation has
+	been disabled as it is only marginally used and is a performance
+	hit for small sampling sets.
+      </action>
       <action dev="erans" type="add" issue="RNG-37">
         Implementation of the "Ziggurat" algorithm for Gaussian sampling.
       </action>
commit	406a41d33cacf63d885958e2a73b1d7dbd8a6abd	[log] [tgz]
author	Rob Tompkins <chtompki@gmail.com>	Mon Aug 06 08:16:21 2018 -0400
committer	Rob Tompkins <chtompki@gmail.com>	Mon Aug 06 08:16:21 2018 -0400
tree	57b22371f18848a659cc8a4159840c226c46d8f7
parent	f73546863611e5dd850fd311277f2019259489ce [diff]
parent	2a31d8764facb77c8a3c301b0613078df8c93d13 [diff]