| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.commons.statistics.distribution; |
| |
| import org.apache.commons.numbers.core.Precision; |
| import org.apache.commons.rng.simple.RandomSource; |
| import org.junit.jupiter.api.Assertions; |
| import org.junit.jupiter.api.BeforeEach; |
| import org.junit.jupiter.api.Test; |
| |
| /** |
| * Test cases for HyperGeometriclDistribution. |
| * Extends DiscreteDistributionAbstractTest. See class javadoc for |
| * DiscreteDistributionAbstractTest for details. |
| */ |
| class HypergeometricDistributionTest extends DiscreteDistributionAbstractTest { |
| |
| //---------------------- Override tolerance -------------------------------- |
| |
| @BeforeEach |
| void customSetUp() { |
| setTolerance(1e-12); |
| } |
| |
| //-------------- Implementations for abstract methods ---------------------- |
| |
| @Override |
| public DiscreteDistribution makeDistribution() { |
| return new HypergeometricDistribution(10, 5, 5); |
| } |
| |
| @Override |
| public int[] makeProbabilityTestPoints() { |
| return new int[] {-1, 0, 1, 2, 3, 4, 5, 10}; |
| } |
| |
| @Override |
| public double[] makeProbabilityTestValues() { |
| // Reference values are from R, version 2.15.3. |
| return new double[] {0d, 0.00396825396825, 0.0992063492063, 0.396825396825, 0.396825396825, |
| 0.0992063492063, 0.00396825396825, 0d}; |
| } |
| |
| @Override |
| public double[] makeLogProbabilityTestValues() { |
| // Reference values are from R, version 2.14.1. |
| //-Inf -Inf |
| return new double[] {Double.NEGATIVE_INFINITY, -5.52942908751142, -2.31055326264322, -0.924258901523332, |
| -0.924258901523332, -2.31055326264322, -5.52942908751142, Double.NEGATIVE_INFINITY}; |
| } |
| |
| @Override |
| public int[] makeCumulativeTestPoints() { |
| return makeProbabilityTestPoints(); |
| } |
| |
| @Override |
| public double[] makeCumulativeTestValues() { |
| // Reference values are from R, version 2.15.3. |
| return new double[] {0d, 0.00396825396825, 0.103174603175, .5, 0.896825396825, 0.996031746032, |
| 1, 1}; |
| } |
| |
| @Override |
| public double[] makeInverseCumulativeTestPoints() { |
| return new double[] {0d, 0.001d, 0.010d, 0.025d, 0.050d, 0.100d, 0.999d, |
| 0.990d, 0.975d, 0.950d, 0.900d, 1d}; |
| } |
| |
| @Override |
| public int[] makeInverseCumulativeTestValues() { |
| return new int[] {0, 0, 1, 1, 1, 1, 5, 4, 4, 4, 4, 5}; |
| } |
| |
| //-------------------- Additional test cases ------------------------------- |
| |
| /** Verify that if there are no failures, mass is concentrated on sampleSize. */ |
| @Test |
| void testDegenerateNoFailures() { |
| final HypergeometricDistribution dist = new HypergeometricDistribution(5, 5, 3); |
| setDistribution(dist); |
| setCumulativeTestPoints(new int[] {-1, 0, 1, 3, 10 }); |
| setCumulativeTestValues(new double[] {0d, 0d, 0d, 1d, 1d}); |
| setProbabilityTestPoints(new int[] {-1, 0, 1, 3, 10}); |
| setProbabilityTestValues(new double[] {0d, 0d, 0d, 1d, 0d}); |
| setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); |
| setInverseCumulativeTestValues(new int[] {3, 3}); |
| verifyProbabilities(); |
| verifyLogProbabilities(); |
| verifyCumulativeProbabilities(); |
| verifySurvivalProbability(); |
| verifySurvivalAndCumulativeProbabilityComplement(); |
| verifyInverseCumulativeProbabilities(); |
| Assertions.assertEquals(3, dist.getSupportLowerBound()); |
| Assertions.assertEquals(3, dist.getSupportUpperBound()); |
| } |
| |
| /** Verify that if there are no successes, mass is concentrated on 0 */ |
| @Test |
| void testDegenerateNoSuccesses() { |
| final HypergeometricDistribution dist = new HypergeometricDistribution(5, 0, 3); |
| setDistribution(dist); |
| setCumulativeTestPoints(new int[] {-1, 0, 1, 3, 10 }); |
| setCumulativeTestValues(new double[] {0d, 1d, 1d, 1d, 1d}); |
| setProbabilityTestPoints(new int[] {-1, 0, 1, 3, 10}); |
| setProbabilityTestValues(new double[] {0d, 1d, 0d, 0d, 0d}); |
| setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); |
| setInverseCumulativeTestValues(new int[] {0, 0}); |
| verifyProbabilities(); |
| verifyLogProbabilities(); |
| verifyCumulativeProbabilities(); |
| verifySurvivalProbability(); |
| verifySurvivalAndCumulativeProbabilityComplement(); |
| verifyInverseCumulativeProbabilities(); |
| Assertions.assertEquals(0, dist.getSupportLowerBound()); |
| Assertions.assertEquals(0, dist.getSupportUpperBound()); |
| } |
| |
| /** Verify that if sampleSize = populationSize, mass is concentrated on numberOfSuccesses. */ |
| @Test |
| void testDegenerateFullSample() { |
| final HypergeometricDistribution dist = new HypergeometricDistribution(5, 3, 5); |
| setDistribution(dist); |
| setCumulativeTestPoints(new int[] {-1, 0, 1, 3, 10 }); |
| setCumulativeTestValues(new double[] {0d, 0d, 0d, 1d, 1d}); |
| setProbabilityTestPoints(new int[] {-1, 0, 1, 3, 10}); |
| setProbabilityTestValues(new double[] {0d, 0d, 0d, 1d, 0d}); |
| setInverseCumulativeTestPoints(new double[] {0.1d, 0.5d}); |
| setInverseCumulativeTestValues(new int[] {3, 3}); |
| verifyProbabilities(); |
| verifyLogProbabilities(); |
| verifyCumulativeProbabilities(); |
| verifySurvivalProbability(); |
| verifySurvivalAndCumulativeProbabilityComplement(); |
| verifyInverseCumulativeProbabilities(); |
| Assertions.assertEquals(3, dist.getSupportLowerBound()); |
| Assertions.assertEquals(3, dist.getSupportUpperBound()); |
| } |
| |
| @Test |
| void testParameterAccessors() { |
| final HypergeometricDistribution dist = new HypergeometricDistribution(5, 3, 4); |
| Assertions.assertEquals(5, dist.getPopulationSize()); |
| Assertions.assertEquals(3, dist.getNumberOfSuccesses()); |
| Assertions.assertEquals(4, dist.getSampleSize()); |
| } |
| |
| @Test |
| void testConstructorPrecondition1() { |
| Assertions.assertThrows(DistributionException.class, () -> new HypergeometricDistribution(0, 3, 5)); |
| } |
| @Test |
| void testConstructorPrecondition2() { |
| Assertions.assertThrows(DistributionException.class, () -> new HypergeometricDistribution(5, -1, 5)); |
| } |
| @Test |
| void testConstructorPrecondition3() { |
| Assertions.assertThrows(DistributionException.class, () -> new HypergeometricDistribution(5, 3, -1)); |
| } |
| @Test |
| void testConstructorPrecondition4() { |
| Assertions.assertThrows(DistributionException.class, () -> new HypergeometricDistribution(5, 6, 5)); |
| } |
| @Test |
| void testConstructorPrecondition5() { |
| Assertions.assertThrows(DistributionException.class, () -> new HypergeometricDistribution(5, 3, 6)); |
| } |
| |
| @Test |
| void testMoments() { |
| final double tol = 1e-9; |
| HypergeometricDistribution dist; |
| |
| dist = new HypergeometricDistribution(1500, 40, 100); |
| Assertions.assertEquals(40d * 100d / 1500d, dist.getMean(), tol); |
| Assertions.assertEquals((100d * 40d * (1500d - 100d) * (1500d - 40d)) / ((1500d * 1500d * 1499d)), dist.getVariance(), tol); |
| |
| dist = new HypergeometricDistribution(3000, 55, 200); |
| Assertions.assertEquals(55d * 200d / 3000d, dist.getMean(), tol); |
| Assertions.assertEquals((200d * 55d * (3000d - 200d) * (3000d - 55d)) / ((3000d * 3000d * 2999d)), dist.getVariance(), tol); |
| } |
| |
| @Test |
| void testLargeValues() { |
| final int populationSize = 3456; |
| final int sampleSize = 789; |
| final int numberOfSucceses = 101; |
| // data[i][3] contains P(x >= x). |
| // It is tested using survivalProbability(x - 1) |
| final double[][] data = { |
| {0.0, 2.75646034603961e-12, 2.75646034603961e-12, 1.0}, |
| {1.0, 8.55705370142386e-11, 8.83269973602783e-11, 0.999999999997244}, |
| {2.0, 1.31288129219665e-9, 1.40120828955693e-9, 0.999999999911673}, |
| {3.0, 1.32724172984193e-8, 1.46736255879763e-8, 0.999999998598792}, |
| {4.0, 9.94501711734089e-8, 1.14123796761385e-7, 0.999999985326375}, |
| {5.0, 5.89080768883643e-7, 7.03204565645028e-7, 0.999999885876203}, |
| {20.0, 0.0760051397707708, 0.27349758476299, 0.802507555007781}, |
| {21.0, 0.087144222047629, 0.360641806810619, 0.72650241523701}, |
| {22.0, 0.0940378846881819, 0.454679691498801, 0.639358193189381}, |
| {23.0, 0.0956897500614809, 0.550369441560282, 0.545320308501199}, |
| {24.0, 0.0919766921922999, 0.642346133752582, 0.449630558439718}, |
| {25.0, 0.083641637261095, 0.725987771013677, 0.357653866247418}, |
| {96.0, 5.93849188852098e-57, 1.0, 6.01900244560712e-57}, |
| {97.0, 7.96593036832547e-59, 1.0, 8.05105570861321e-59}, |
| {98.0, 8.44582921934367e-61, 1.0, 8.5125340287733e-61}, |
| {99.0, 6.63604297068222e-63, 1.0, 6.670480942963e-63}, |
| {100.0, 3.43501099007557e-65, 1.0, 3.4437972280786e-65}, |
| {101.0, 8.78623800302957e-68, 1.0, 8.78623800302957e-68}, |
| // Out of domain |
| {sampleSize + 1, 0, 1.0, 0}, |
| }; |
| |
| testHypergeometricDistributionProbabilities(populationSize, sampleSize, numberOfSucceses, data); |
| } |
| |
| private static void testHypergeometricDistributionProbabilities(int populationSize, int sampleSize, |
| int numberOfSucceses, double[][] data) { |
| final HypergeometricDistribution dist = new HypergeometricDistribution(populationSize, numberOfSucceses, sampleSize); |
| for (int i = 0; i < data.length; ++i) { |
| final int x = (int)data[i][0]; |
| final double pmf = data[i][1]; |
| final double actualPmf = dist.probability(x); |
| TestUtils.assertRelativelyEquals(() -> "Expected equals for <" + x + "> pmf", pmf, actualPmf, 1.0e-9); |
| |
| final double cdf = data[i][2]; |
| final double actualCdf = dist.cumulativeProbability(x); |
| TestUtils.assertRelativelyEquals(() -> "Expected equals for <" + x + "> cdf", cdf, actualCdf, 1.0e-9); |
| |
| final double cdf1 = data[i][3]; |
| final double actualCdf1 = dist.survivalProbability(x - 1); |
| TestUtils.assertRelativelyEquals(() -> "Expected equals for <" + x + "> cdf1", cdf1, actualCdf1, 1.0e-9); |
| } |
| } |
| |
| @Test |
| void testMoreLargeValues() { |
| final int populationSize = 26896; |
| final int sampleSize = 895; |
| final int numberOfSucceses = 55; |
| final double[][] data = { |
| {0.0, 0.155168304750504, 0.155168304750504, 1.0}, |
| {1.0, 0.29437545000746, 0.449543754757964, 0.844831695249496}, |
| {2.0, 0.273841321577003, 0.723385076334967, 0.550456245242036}, |
| {3.0, 0.166488572570786, 0.889873648905753, 0.276614923665033}, |
| {4.0, 0.0743969744713231, 0.964270623377076, 0.110126351094247}, |
| {5.0, 0.0260542785784855, 0.990324901955562, 0.0357293766229237}, |
| {20.0, 3.57101101678792e-16, 1.0, 3.78252101622096e-16}, |
| {21.0, 2.00551638598312e-17, 1.0, 2.11509999433041e-17}, |
| {22.0, 1.04317070180562e-18, 1.0, 1.09583608347287e-18}, |
| {23.0, 5.03153504903308e-20, 1.0, 5.266538166725e-20}, |
| {24.0, 2.2525984149695e-21, 1.0, 2.35003117691919e-21}, |
| {25.0, 9.3677424515947e-23, 1.0, 9.74327619496943e-23}, |
| {50.0, 9.83633962945521e-69, 1.0, 9.8677629437617e-69}, |
| {51.0, 3.13448949497553e-71, 1.0, 3.14233143064882e-71}, |
| {52.0, 7.82755221928122e-74, 1.0, 7.84193567329055e-74}, |
| {53.0, 1.43662126065532e-76, 1.0, 1.43834540093295e-76}, |
| {54.0, 1.72312692517348e-79, 1.0, 1.7241402776278e-79}, |
| {55.0, 1.01335245432581e-82, 1.0, 1.01335245432581e-82}, |
| }; |
| testHypergeometricDistributionProbabilities(populationSize, sampleSize, numberOfSucceses, data); |
| } |
| |
| /** |
| * Test Math-644 is ported from Commons Math 3 where the distribution had the function |
| * upperCumulativeProbability(x) to compute P(X >= x). This has been replaced |
| * in Commons Statistics with survivalProbability(x) which computes P(X > x). To |
| * create the equivalent use survivalProbability(x - 1). |
| */ |
| @Test |
| void testMath644() { |
| final int N = 14761461; // population |
| final int m = 1035; // successes in population |
| final int n = 1841; // number of trials |
| |
| final int k = 0; |
| final HypergeometricDistribution dist = new HypergeometricDistribution(N, m, n); |
| |
| // Compute upper cumulative probability using the survival probability |
| Assertions.assertEquals(0, Precision.compareTo(1.0, dist.survivalProbability(k - 1), 1)); |
| Assertions.assertTrue(Precision.compareTo(dist.cumulativeProbability(k), 0.0, 1) > 0); |
| |
| // another way to calculate the upper cumulative probability |
| final double upper = 1.0 - dist.cumulativeProbability(k) + dist.probability(k); |
| Assertions.assertEquals(0, Precision.compareTo(1.0, upper, 1)); |
| } |
| |
| @Test |
| void testZeroTrial() { |
| final int n = 11; // population |
| final int m = 4; // successes in population |
| final int s = 0; // number of trials |
| |
| final HypergeometricDistribution dist = new HypergeometricDistribution(n, m, 0); |
| |
| for (int i = 1; i <= n; i++) { |
| final double p = dist.probability(i); |
| Assertions.assertEquals(0, p, () -> "p=" + p); |
| } |
| } |
| |
| @Test |
| void testMath1356() { |
| final int n = 11; // population |
| final int m = 11; // successes in population |
| |
| for (int s = 0; s <= n; s++) { |
| final HypergeometricDistribution dist = new HypergeometricDistribution(n, m, s); |
| final double p = dist.probability(s); |
| Assertions.assertEquals(1, p, () -> "p=" + p); |
| } |
| } |
| |
| @Test |
| void testMath1021() { |
| final int N = 43130568; |
| final int m = 42976365; |
| final int n = 50; |
| final DiscreteDistribution.Sampler dist = |
| new HypergeometricDistribution(N, m, n).createSampler(RandomSource.create(RandomSource.WELL_512_A)); |
| |
| for (int i = 0; i < 100; i++) { |
| final int sample = dist.sample(); |
| Assertions.assertTrue(0 <= sample, () -> "sample=" + sample); |
| Assertions.assertTrue(sample <= n, () -> "sample=" + sample); |
| } |
| } |
| |
| @Test |
| void testHighPrecisionCumulativeProbabilities() { |
| // computed using R version 3.4.4 |
| setDistribution(new HypergeometricDistribution(500, 70, 300)); |
| setCumulativePrecisionTestPoints(new int[] {10, 8}); |
| setCumulativePrecisionTestValues(new double[] {2.4055720603264525e-17, 1.2848174992266236e-19}); |
| verifySurvivalProbabilityPrecision(); |
| } |
| |
| @Test |
| void testHighPrecisionSurvivalProbabilities() { |
| // computed using R version 3.4.4 |
| setDistribution(new HypergeometricDistribution(500, 70, 300)); |
| setSurvivalPrecisionTestPoints(new int[] {68, 69}); |
| setSurvivalPrecisionTestValues(new double[] {4.570379934029859e-16, 7.4187180434325268e-18}); |
| verifySurvivalProbabilityPrecision(); |
| } |
| } |