commons-statistics-distribution/src/main/java/org/apache/commons/statistics/distribution/HypergeometricDistribution.java - commons-statistics - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.commons.statistics.distribution;

 /**
  * Implementation of the <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">hypergeometric distribution</a>.
  */
 public class HypergeometricDistribution extends AbstractDiscreteDistribution {
     /** The number of successes in the population. */
     private final int numberOfSuccesses;
     /** The population size. */
     private final int populationSize;
     /** The sample size. */
     private final int sampleSize;
     /** The lower bound of the support (inclusive). */
     private final int lowerBound;
     /** The upper bound of the support (inclusive). */
     private final int upperBound;

     /**
      * Creates a new hypergeometric distribution.
      *
      * @param populationSize Population size.
      * @param numberOfSuccesses Number of successes in the population.
      * @param sampleSize Sample size.
      * @throws IllegalArgumentException if {@code numberOfSuccesses < 0}, or
      * {@code populationSize <= 0} or {@code numberOfSuccesses > populationSize},
      * or {@code sampleSize > populationSize}.
      */
     public HypergeometricDistribution(int populationSize,
                                       int numberOfSuccesses,
                                       int sampleSize) {
         if (populationSize <= 0) {
             throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
                                             populationSize);
         }
         if (numberOfSuccesses < 0) {
             throw new DistributionException(DistributionException.NEGATIVE,
                                             numberOfSuccesses);
         }
         if (sampleSize < 0) {
             throw new DistributionException(DistributionException.NEGATIVE,
                                             sampleSize);
         }

         if (numberOfSuccesses > populationSize) {
             throw new DistributionException(DistributionException.TOO_LARGE,
                                             numberOfSuccesses, populationSize);
         }
         if (sampleSize > populationSize) {
             throw new DistributionException(DistributionException.TOO_LARGE,
                                             sampleSize, populationSize);
         }

         this.numberOfSuccesses = numberOfSuccesses;
         this.populationSize = populationSize;
         this.sampleSize = sampleSize;
         lowerBound = getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
         upperBound = getUpperDomain(numberOfSuccesses, sampleSize);
     }

     /**
      * Return the lowest domain value for the given hypergeometric distribution
      * parameters.
      *
      * @param n Population size.
      * @param m Number of successes in the population.
      * @param k Sample size.
      * @return the lowest domain value of the hypergeometric distribution.
      */
     private static int getLowerDomain(int n, int m, int k) {
         return Math.max(0, m - (n - k));
     }

     /**
      * Return the highest domain value for the given hypergeometric distribution
      * parameters.
      *
      * @param m Number of successes in the population.
      * @param k Sample size.
      * @return the highest domain value of the hypergeometric distribution.
      */
     private static int getUpperDomain(int m, int k) {
         return Math.min(k, m);
     }

     /**
      * Access the number of successes.
      *
      * @return the number of successes.
      */
     public int getNumberOfSuccesses() {
         return numberOfSuccesses;
     }

     /**
      * Access the population size.
      *
      * @return the population size.
      */
     public int getPopulationSize() {
         return populationSize;
     }

     /**
      * Access the sample size.
      *
      * @return the sample size.
      */
     public int getSampleSize() {
         return sampleSize;
     }

     /** {@inheritDoc} */
     @Override
     public double probability(int x) {
         return Math.exp(logProbability(x));
     }

     /** {@inheritDoc} */
     @Override
     public double logProbability(int x) {
         if (x < lowerBound || x > upperBound) {
             return Double.NEGATIVE_INFINITY;
         }
         final double p = (double) sampleSize / (double) populationSize;
         final double q = (double) (populationSize - sampleSize) / (double) populationSize;
         return logProbability(x, p, q);
     }

     /**
      * Compute the log probability.
      *
      * @param x Value.
      * @param p sample size / population size.
      * @param q (population size - sample size) / population size
      * @return log(P(X = x))
      */
     private double logProbability(int x, double p, double q) {
         final double p1 =
                 SaddlePointExpansionUtils.logBinomialProbability(x, numberOfSuccesses, p, q);
         final double p2 =
                 SaddlePointExpansionUtils.logBinomialProbability(sampleSize - x,
                         populationSize - numberOfSuccesses, p, q);
         final double p3 =
                 SaddlePointExpansionUtils.logBinomialProbability(sampleSize, populationSize, p, q);
         return p1 + p2 - p3;
     }

     /** {@inheritDoc} */
     @Override
     public double cumulativeProbability(int x) {
         if (x < lowerBound) {
             return 0.0;
         } else if (x >= upperBound) {
             return 1.0;
         }
         return innerCumulativeProbability(lowerBound, x);
     }

     /** {@inheritDoc} */
     @Override
     public double survivalProbability(int x) {
         if (x < lowerBound) {
             return 1.0;
         } else if (x >= upperBound) {
             return 0.0;
         }
         return innerCumulativeProbability(upperBound, x + 1);
     }

     /**
      * For this distribution, {@code X}, this method returns
      * {@code P(x0 <= X <= x1)}.
      * This probability is computed by summing the point probabilities for the
      * values {@code x0, x0 + dx, x0 + 2 * dx, ..., x1}; the direction {@code dx} is determined
      * using a comparison of the input bounds.
      * This should be called by using {@code x0} as the domain limit and {@code x1}
      * as the internal value. This will result in an initial sum of increasing larger magnitudes.
      *
      * @param x0 Inclusive domain bound.
      * @param x1 Inclusive internal bound.
      * @return {@code P(x0 <= X <= x1)}.
      */
     private double innerCumulativeProbability(int x0, int x1) {
         // Assume the range is within the domain.
         // Reuse the computation for probability(x) but avoid checking the domain for each call.
         final double p = (double) sampleSize / (double) populationSize;
         final double q = (double) (populationSize - sampleSize) / (double) populationSize;
         int x = x0;
         double ret = Math.exp(logProbability(x, p, q));
         if (x0 < x1) {
             while (x != x1) {
                 x++;
                 ret += Math.exp(logProbability(x, p, q));
             }
         } else {
             while (x != x1) {
                 x--;
                 ret += Math.exp(logProbability(x, p, q));
             }
         }
         return ret;
     }

     /**
      * {@inheritDoc}
      *
      * For population size {@code N}, number of successes {@code m}, and sample
      * size {@code n}, the mean is {@code n * m / N}.
      */
     @Override
     public double getMean() {
         return getSampleSize() * (getNumberOfSuccesses() / (double) getPopulationSize());
     }

     /**
      * {@inheritDoc}
      *
      * For population size {@code N}, number of successes {@code m}, and sample
      * size {@code n}, the variance is
      * {@code (n * m * (N - n) * (N - m)) / (N^2 * (N - 1))}.
      */
     @Override
     public double getVariance() {
         final double N = getPopulationSize();
         final double m = getNumberOfSuccesses();
         final double n = getSampleSize();
         return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
     }

     /**
      * {@inheritDoc}
      *
      * For population size {@code N}, number of successes {@code m}, and sample
      * size {@code n}, the lower bound of the support is
      * {@code max(0, n + m - N)}.
      *
      * @return lower bound of the support
      */
     @Override
     public int getSupportLowerBound() {
         return lowerBound;
     }

     /**
      * {@inheritDoc}
      *
      * For number of successes {@code m} and sample size {@code n}, the upper
      * bound of the support is {@code min(m, n)}.
      *
      * @return upper bound of the support
      */
     @Override
     public int getSupportUpperBound() {
         return upperBound;
     }

     /**
      * {@inheritDoc}
      *
      * The support of this distribution is connected.
      *
      * @return {@code true}
      */
     @Override
     public boolean isSupportConnected() {
         return true;
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package org.apache.commons.statistics.distribution;

	/**
	* Implementation of the <a href="http://en.wikipedia.org/wiki/Hypergeometric_distribution">hypergeometric distribution</a>.
	*/
	public class HypergeometricDistribution extends AbstractDiscreteDistribution {
	/** The number of successes in the population. */
	private final int numberOfSuccesses;
	/** The population size. */
	private final int populationSize;
	/** The sample size. */
	private final int sampleSize;
	/** The lower bound of the support (inclusive). */
	private final int lowerBound;
	/** The upper bound of the support (inclusive). */
	private final int upperBound;

	/**
	* Creates a new hypergeometric distribution.
	*
	* @param populationSize Population size.
	* @param numberOfSuccesses Number of successes in the population.
	* @param sampleSize Sample size.
	* @throws IllegalArgumentException if {@code numberOfSuccesses < 0}, or
	* {@code populationSize <= 0} or {@code numberOfSuccesses > populationSize},
	* or {@code sampleSize > populationSize}.
	*/
	public HypergeometricDistribution(int populationSize,
	int numberOfSuccesses,
	int sampleSize) {
	if (populationSize <= 0) {
	throw new DistributionException(DistributionException.NOT_STRICTLY_POSITIVE,
	populationSize);
	}
	if (numberOfSuccesses < 0) {
	throw new DistributionException(DistributionException.NEGATIVE,
	numberOfSuccesses);
	}
	if (sampleSize < 0) {
	throw new DistributionException(DistributionException.NEGATIVE,
	sampleSize);
	}

	if (numberOfSuccesses > populationSize) {
	throw new DistributionException(DistributionException.TOO_LARGE,
	numberOfSuccesses, populationSize);
	}
	if (sampleSize > populationSize) {
	throw new DistributionException(DistributionException.TOO_LARGE,
	sampleSize, populationSize);
	}

	this.numberOfSuccesses = numberOfSuccesses;
	this.populationSize = populationSize;
	this.sampleSize = sampleSize;
	lowerBound = getLowerDomain(populationSize, numberOfSuccesses, sampleSize);
	upperBound = getUpperDomain(numberOfSuccesses, sampleSize);
	}

	/**
	* Return the lowest domain value for the given hypergeometric distribution
	* parameters.
	*
	* @param n Population size.
	* @param m Number of successes in the population.
	* @param k Sample size.
	* @return the lowest domain value of the hypergeometric distribution.
	*/
	private static int getLowerDomain(int n, int m, int k) {
	return Math.max(0, m - (n - k));
	}

	/**
	* Return the highest domain value for the given hypergeometric distribution
	* parameters.
	*
	* @param m Number of successes in the population.
	* @param k Sample size.
	* @return the highest domain value of the hypergeometric distribution.
	*/
	private static int getUpperDomain(int m, int k) {
	return Math.min(k, m);
	}

	/**
	* Access the number of successes.
	*
	* @return the number of successes.
	*/
	public int getNumberOfSuccesses() {
	return numberOfSuccesses;
	}

	/**
	* Access the population size.
	*
	* @return the population size.
	*/
	public int getPopulationSize() {
	return populationSize;
	}

	/**
	* Access the sample size.
	*
	* @return the sample size.
	*/
	public int getSampleSize() {
	return sampleSize;
	}

	/** {@inheritDoc} */
	@Override
	public double probability(int x) {
	return Math.exp(logProbability(x));
	}

	/** {@inheritDoc} */
	@Override
	public double logProbability(int x) {
	if (x < lowerBound \|\| x > upperBound) {
	return Double.NEGATIVE_INFINITY;
	}
	final double p = (double) sampleSize / (double) populationSize;
	final double q = (double) (populationSize - sampleSize) / (double) populationSize;
	return logProbability(x, p, q);
	}

	/**
	* Compute the log probability.
	*
	* @param x Value.
	* @param p sample size / population size.
	* @param q (population size - sample size) / population size
	* @return log(P(X = x))
	*/
	private double logProbability(int x, double p, double q) {
	final double p1 =
	SaddlePointExpansionUtils.logBinomialProbability(x, numberOfSuccesses, p, q);
	final double p2 =
	SaddlePointExpansionUtils.logBinomialProbability(sampleSize - x,
	populationSize - numberOfSuccesses, p, q);
	final double p3 =
	SaddlePointExpansionUtils.logBinomialProbability(sampleSize, populationSize, p, q);
	return p1 + p2 - p3;
	}

	/** {@inheritDoc} */
	@Override
	public double cumulativeProbability(int x) {
	if (x < lowerBound) {
	return 0.0;
	} else if (x >= upperBound) {
	return 1.0;
	}
	return innerCumulativeProbability(lowerBound, x);
	}

	/** {@inheritDoc} */
	@Override
	public double survivalProbability(int x) {
	if (x < lowerBound) {
	return 1.0;
	} else if (x >= upperBound) {
	return 0.0;
	}
	return innerCumulativeProbability(upperBound, x + 1);
	}

	/**
	* For this distribution, {@code X}, this method returns
	* {@code P(x0 <= X <= x1)}.
	* This probability is computed by summing the point probabilities for the
	* values {@code x0, x0 + dx, x0 + 2 * dx, ..., x1}; the direction {@code dx} is determined
	* using a comparison of the input bounds.
	* This should be called by using {@code x0} as the domain limit and {@code x1}
	* as the internal value. This will result in an initial sum of increasing larger magnitudes.
	*
	* @param x0 Inclusive domain bound.
	* @param x1 Inclusive internal bound.
	* @return {@code P(x0 <= X <= x1)}.
	*/
	private double innerCumulativeProbability(int x0, int x1) {
	// Assume the range is within the domain.
	// Reuse the computation for probability(x) but avoid checking the domain for each call.
	final double p = (double) sampleSize / (double) populationSize;
	final double q = (double) (populationSize - sampleSize) / (double) populationSize;
	int x = x0;
	double ret = Math.exp(logProbability(x, p, q));
	if (x0 < x1) {
	while (x != x1) {
	x++;
	ret += Math.exp(logProbability(x, p, q));
	}
	} else {
	while (x != x1) {
	x--;
	ret += Math.exp(logProbability(x, p, q));
	}
	}
	return ret;
	}

	/**
	* {@inheritDoc}
	*
	* For population size {@code N}, number of successes {@code m}, and sample
	* size {@code n}, the mean is {@code n * m / N}.
	*/
	@Override
	public double getMean() {
	return getSampleSize() * (getNumberOfSuccesses() / (double) getPopulationSize());
	}

	/**
	* {@inheritDoc}
	*
	* For population size {@code N}, number of successes {@code m}, and sample
	* size {@code n}, the variance is
	* {@code (n * m * (N - n) * (N - m)) / (N^2 * (N - 1))}.
	*/
	@Override
	public double getVariance() {
	final double N = getPopulationSize();
	final double m = getNumberOfSuccesses();
	final double n = getSampleSize();
	return (n * m * (N - n) * (N - m)) / (N * N * (N - 1));
	}

	/**
	* {@inheritDoc}
	*
	* For population size {@code N}, number of successes {@code m}, and sample
	* size {@code n}, the lower bound of the support is
	* {@code max(0, n + m - N)}.
	*
	* @return lower bound of the support
	*/
	@Override
	public int getSupportLowerBound() {
	return lowerBound;
	}

	/**
	* {@inheritDoc}
	*
	* For number of successes {@code m} and sample size {@code n}, the upper
	* bound of the support is {@code min(m, n)}.
	*
	* @return upper bound of the support
	*/
	@Override
	public int getSupportUpperBound() {
	return upperBound;
	}

	/**
	* {@inheritDoc}
	*
	* The support of this distribution is connected.
	*
	* @return {@code true}
	*/
	@Override
	public boolean isSupportConnected() {
	return true;
	}
	}