commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/stat/inference/OneWayAnova.java - commons-math - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.commons.math4.legacy.stat.inference;

 import java.util.ArrayList;
 import java.util.Collection;

 import org.apache.commons.statistics.distribution.FDistribution;
 import org.apache.commons.math4.legacy.exception.ConvergenceException;
 import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
 import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
 import org.apache.commons.math4.legacy.exception.NullArgumentException;
 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
 import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics;

 /**
  * Implements one-way ANOVA (analysis of variance) statistics.
  *
  * <p> Tests for differences between two or more categories of univariate data
  * (for example, the body mass index of accountants, lawyers, doctors and
  * computer programmers).  When two categories are given, this is equivalent to
  * the {@link org.apache.commons.math4.legacy.stat.inference.TTest}.
  * </p><p>
  * Uses the {@link org.apache.commons.statistics.distribution.FDistribution
  * commons-math F Distribution implementation} to estimate exact p-values.</p>
  * <p>This implementation is based on a description at
  * http://faculty.vassar.edu/lowry/ch13pt1.html</p>
  * <pre>
  * Abbreviations: bg = between groups,
  *                wg = within groups,
  *                ss = sum squared deviations
  * </pre>
  *
  * @since 1.2
  */
 public class OneWayAnova {

     /**
      * Default constructor.
      */
     public OneWayAnova() {
     }

     /**
      * Computes the ANOVA F-value for a collection of <code>double[]</code>
      * arrays.
      *
      * <p><strong>Preconditions</strong>: <ul>
      * <li>The categoryData <code>Collection</code> must contain
      * <code>double[]</code> arrays.</li>
      * <li> There must be at least two <code>double[]</code> arrays in the
      * <code>categoryData</code> collection and each of these arrays must
      * contain at least two values.</li></ul><p>
      * This implementation computes the F statistic using the definitional
      * formula<pre>
      *   F = msbg/mswg</pre>
      * where<pre>
      *  msbg = between group mean square
      *  mswg = within group mean square</pre>
      * are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
      * here</a>
      *
      * @param categoryData <code>Collection</code> of <code>double[]</code>
      * arrays each containing data for one category
      * @return Fvalue
      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
      * @throws DimensionMismatchException if the length of the <code>categoryData</code>
      * array is less than 2 or a contained <code>double[]</code> array does not have
      * at least two values
      */
     public double anovaFValue(final Collection<double[]> categoryData)
         throws NullArgumentException, DimensionMismatchException {

         AnovaStats a = anovaStats(categoryData);
         return a.f;

     }

     /**
      * Computes the ANOVA P-value for a collection of <code>double[]</code>
      * arrays.
      *
      * <p><strong>Preconditions</strong>: <ul>
      * <li>The categoryData <code>Collection</code> must contain
      * <code>double[]</code> arrays.</li>
      * <li> There must be at least two <code>double[]</code> arrays in the
      * <code>categoryData</code> collection and each of these arrays must
      * contain at least two values.</li></ul><p>
      * This implementation uses the
      * {@link org.apache.commons.statistics.distribution.FDistribution
      * commons-math F Distribution implementation} to estimate the exact
      * p-value, using the formula<pre>
      *   p = 1 - cumulativeProbability(F)</pre>
      * where <code>F</code> is the F value and <code>cumulativeProbability</code>
      * is the commons-math implementation of the F distribution.
      *
      * @param categoryData <code>Collection</code> of <code>double[]</code>
      * arrays each containing data for one category
      * @return Pvalue
      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
      * @throws DimensionMismatchException if the length of the <code>categoryData</code>
      * array is less than 2 or a contained <code>double[]</code> array does not have
      * at least two values
      * @throws ConvergenceException if the p-value can not be computed due to a convergence error
      * @throws MaxCountExceededException if the maximum number of iterations is exceeded
      */
     public double anovaPValue(final Collection<double[]> categoryData)
         throws NullArgumentException, DimensionMismatchException,
         ConvergenceException, MaxCountExceededException {

         final AnovaStats a = anovaStats(categoryData);
         // No try-catch or advertised exception because args are valid
         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
         final FDistribution fdist = FDistribution.of(a.dfbg, a.dfwg);
         return 1.0 - fdist.cumulativeProbability(a.f);

     }

     /**
      * Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
      *
      * <p><strong>Preconditions</strong>: <ul>
      * <li>The categoryData <code>Collection</code> must contain
      * {@link SummaryStatistics}.</li>
      * <li> There must be at least two {@link SummaryStatistics} in the
      * <code>categoryData</code> collection and each of these statistics must
      * contain at least two values.</li></ul><p>
      * This implementation uses the
      * {@link org.apache.commons.statistics.distribution.FDistribution
      * commons-math F Distribution implementation} to estimate the exact
      * p-value, using the formula<pre>
      *   p = 1 - cumulativeProbability(F)</pre>
      * where <code>F</code> is the F value and <code>cumulativeProbability</code>
      * is the commons-math implementation of the F distribution.
      *
      * @param categoryData <code>Collection</code> of {@link SummaryStatistics}
      * each containing data for one category
      * @param allowOneElementData if true, allow computation for one catagory
      * only or for one data element per category
      * @return Pvalue
      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
      * @throws DimensionMismatchException if the length of the <code>categoryData</code>
      * array is less than 2 or a contained {@link SummaryStatistics} does not have
      * at least two values
      * @throws ConvergenceException if the p-value can not be computed due to a convergence error
      * @throws MaxCountExceededException if the maximum number of iterations is exceeded
      * @since 3.2
      */
     public double anovaPValue(final Collection<SummaryStatistics> categoryData,
                               final boolean allowOneElementData)
         throws NullArgumentException, DimensionMismatchException,
                ConvergenceException, MaxCountExceededException {

         final AnovaStats a = anovaStats(categoryData, allowOneElementData);
         // pass a null rng to avoid unneeded overhead as we will not sample from this distribution
         final FDistribution fdist = FDistribution.of(a.dfbg, a.dfwg);
         return 1.0 - fdist.cumulativeProbability(a.f);

     }

     /**
      * This method calls the method that actually does the calculations (except
      * P-value).
      *
      * @param categoryData
      *            <code>Collection</code> of <code>double[]</code> arrays each
      *            containing data for one category
      * @return computed AnovaStats
      * @throws NullArgumentException
      *             if <code>categoryData</code> is <code>null</code>
      * @throws DimensionMismatchException
      *             if the length of the <code>categoryData</code> array is less
      *             than 2 or a contained <code>double[]</code> array does not
      *             contain at least two values
      */
     private AnovaStats anovaStats(final Collection<double[]> categoryData)
         throws NullArgumentException, DimensionMismatchException {

         NullArgumentException.check(categoryData);

         final Collection<SummaryStatistics> categoryDataSummaryStatistics =
                 new ArrayList<>(categoryData.size());

         // convert arrays to SummaryStatistics
         for (final double[] data : categoryData) {
             final SummaryStatistics dataSummaryStatistics = new SummaryStatistics();
             categoryDataSummaryStatistics.add(dataSummaryStatistics);
             for (final double val : data) {
                 dataSummaryStatistics.addValue(val);
             }
         }

         return anovaStats(categoryDataSummaryStatistics, false);

     }

     /**
      * Performs an ANOVA test, evaluating the null hypothesis that there
      * is no difference among the means of the data categories.
      *
      * <p><strong>Preconditions</strong>: <ul>
      * <li>The categoryData <code>Collection</code> must contain
      * <code>double[]</code> arrays.</li>
      * <li> There must be at least two <code>double[]</code> arrays in the
      * <code>categoryData</code> collection and each of these arrays must
      * contain at least two values.</li>
      * <li>alpha must be strictly greater than 0 and less than or equal to 0.5.
      * </li></ul><p>
      * This implementation uses the
      * {@link org.apache.commons.statistics.distribution.FDistribution
      * commons-math F Distribution implementation} to estimate the exact
      * p-value, using the formula<pre>
      *   p = 1 - cumulativeProbability(F)</pre>
      * where <code>F</code> is the F value and <code>cumulativeProbability</code>
      * is the commons-math implementation of the F distribution.
      * <p>True is returned iff the estimated p-value is less than alpha.</p>
      *
      * @param categoryData <code>Collection</code> of <code>double[]</code>
      * arrays each containing data for one category
      * @param alpha significance level of the test
      * @return true if the null hypothesis can be rejected with
      * confidence 1 - alpha
      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
      * @throws DimensionMismatchException if the length of the <code>categoryData</code>
      * array is less than 2 or a contained <code>double[]</code> array does not have
      * at least two values
      * @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
      * @throws ConvergenceException if the p-value can not be computed due to a convergence error
      * @throws MaxCountExceededException if the maximum number of iterations is exceeded
      */
     public boolean anovaTest(final Collection<double[]> categoryData,
                              final double alpha)
         throws NullArgumentException, DimensionMismatchException,
         OutOfRangeException, ConvergenceException, MaxCountExceededException {

         if (alpha <= 0 || alpha > 0.5) {
             throw new OutOfRangeException(
                     LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
                     alpha, 0, 0.5);
         }
         return anovaPValue(categoryData) < alpha;

     }

     /**
      * This method actually does the calculations (except P-value).
      *
      * @param categoryData <code>Collection</code> of <code>double[]</code>
      * arrays each containing data for one category
      * @param allowOneElementData if true, allow computation for one catagory
      * only or for one data element per category
      * @return computed AnovaStats
      * @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
      * @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of
      * categories is less than 2 or a contained SummaryStatistics does not contain
      * at least two values
      */
     private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
                                   final boolean allowOneElementData)
         throws NullArgumentException, DimensionMismatchException {

         NullArgumentException.check(categoryData);

         if (!allowOneElementData) {
             // check if we have enough categories
             if (categoryData.size() < 2) {
                 throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
                                                      categoryData.size(), 2);
             }

             // check if each category has enough data
             for (final SummaryStatistics array : categoryData) {
                 if (array.getN() <= 1) {
                     throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
                                                          (int) array.getN(), 2);
                 }
             }
         }

         int dfwg = 0;
         double sswg = 0;
         double totsum = 0;
         double totsumsq = 0;
         int totnum = 0;

         for (final SummaryStatistics data : categoryData) {

             final double sum = data.getSum();
             final double sumsq = data.getSumsq();
             final int num = (int) data.getN();
             totnum += num;
             totsum += sum;
             totsumsq += sumsq;

             dfwg += num - 1;
             final double ss = sumsq - ((sum * sum) / num);
             sswg += ss;
         }

         final double sst = totsumsq - ((totsum * totsum) / totnum);
         final double ssbg = sst - sswg;
         final int dfbg = categoryData.size() - 1;
         final double msbg = ssbg / dfbg;
         final double mswg = sswg / dfwg;
         final double f = msbg / mswg;

         return new AnovaStats(dfbg, dfwg, f);

     }

     /**
         Convenience class to pass dfbg,dfwg,F values around within OneWayAnova.
         No get/set methods provided.
     */
     private static final class AnovaStats {

         /** Degrees of freedom in numerator (between groups). */
         private final int dfbg;

         /** Degrees of freedom in denominator (within groups). */
         private final int dfwg;

         /** Statistic. */
         private final double f;

         /**
          * Constructor.
          * @param dfbg degrees of freedom in numerator (between groups)
          * @param dfwg degrees of freedom in denominator (within groups)
          * @param f statistic
          */
         private AnovaStats(int dfbg, int dfwg, double f) {
             this.dfbg = dfbg;
             this.dfwg = dfwg;
             this.f = f;
         }
     }

 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.commons.math4.legacy.stat.inference;

	import java.util.ArrayList;
	import java.util.Collection;

	import org.apache.commons.statistics.distribution.FDistribution;
	import org.apache.commons.math4.legacy.exception.ConvergenceException;
	import org.apache.commons.math4.legacy.exception.DimensionMismatchException;
	import org.apache.commons.math4.legacy.exception.MaxCountExceededException;
	import org.apache.commons.math4.legacy.exception.NullArgumentException;
	import org.apache.commons.math4.legacy.exception.OutOfRangeException;
	import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
	import org.apache.commons.math4.legacy.stat.descriptive.SummaryStatistics;

	/**
	* Implements one-way ANOVA (analysis of variance) statistics.
	*
	* <p> Tests for differences between two or more categories of univariate data
	* (for example, the body mass index of accountants, lawyers, doctors and
	* computer programmers). When two categories are given, this is equivalent to
	* the {@link org.apache.commons.math4.legacy.stat.inference.TTest}.
	* </p><p>
	* Uses the {@link org.apache.commons.statistics.distribution.FDistribution
	* commons-math F Distribution implementation} to estimate exact p-values.</p>
	* <p>This implementation is based on a description at
	* http://faculty.vassar.edu/lowry/ch13pt1.html</p>
	* <pre>
	* Abbreviations: bg = between groups,
	* wg = within groups,
	* ss = sum squared deviations
	* </pre>
	*
	* @since 1.2
	*/
	public class OneWayAnova {

	/**
	* Default constructor.
	*/
	public OneWayAnova() {
	}

	/**
	* Computes the ANOVA F-value for a collection of <code>double[]</code>
	* arrays.
	*
	* <p><strong>Preconditions</strong>: <ul>
	* <li>The categoryData <code>Collection</code> must contain
	* <code>double[]</code> arrays.</li>
	* <li> There must be at least two <code>double[]</code> arrays in the
	* <code>categoryData</code> collection and each of these arrays must
	* contain at least two values.</li></ul><p>
	* This implementation computes the F statistic using the definitional
	* formula<pre>
	* F = msbg/mswg</pre>
	* where<pre>
	* msbg = between group mean square
	* mswg = within group mean square</pre>
	* are as defined <a href="http://faculty.vassar.edu/lowry/ch13pt1.html">
	* here</a>
	*
	* @param categoryData <code>Collection</code> of <code>double[]</code>
	* arrays each containing data for one category
	* @return Fvalue
	* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
	* @throws DimensionMismatchException if the length of the <code>categoryData</code>
	* array is less than 2 or a contained <code>double[]</code> array does not have
	* at least two values
	*/
	public double anovaFValue(final Collection<double[]> categoryData)
	throws NullArgumentException, DimensionMismatchException {

	AnovaStats a = anovaStats(categoryData);
	return a.f;

	}

	/**
	* Computes the ANOVA P-value for a collection of <code>double[]</code>
	* arrays.
	*
	* <p><strong>Preconditions</strong>: <ul>
	* <li>The categoryData <code>Collection</code> must contain
	* <code>double[]</code> arrays.</li>
	* <li> There must be at least two <code>double[]</code> arrays in the
	* <code>categoryData</code> collection and each of these arrays must
	* contain at least two values.</li></ul><p>
	* This implementation uses the
	* {@link org.apache.commons.statistics.distribution.FDistribution
	* commons-math F Distribution implementation} to estimate the exact
	* p-value, using the formula<pre>
	* p = 1 - cumulativeProbability(F)</pre>
	* where <code>F</code> is the F value and <code>cumulativeProbability</code>
	* is the commons-math implementation of the F distribution.
	*
	* @param categoryData <code>Collection</code> of <code>double[]</code>
	* arrays each containing data for one category
	* @return Pvalue
	* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
	* @throws DimensionMismatchException if the length of the <code>categoryData</code>
	* array is less than 2 or a contained <code>double[]</code> array does not have
	* at least two values
	* @throws ConvergenceException if the p-value can not be computed due to a convergence error
	* @throws MaxCountExceededException if the maximum number of iterations is exceeded
	*/
	public double anovaPValue(final Collection<double[]> categoryData)
	throws NullArgumentException, DimensionMismatchException,
	ConvergenceException, MaxCountExceededException {

	final AnovaStats a = anovaStats(categoryData);
	// No try-catch or advertised exception because args are valid
	// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
	final FDistribution fdist = FDistribution.of(a.dfbg, a.dfwg);
	return 1.0 - fdist.cumulativeProbability(a.f);

	}

	/**
	* Computes the ANOVA P-value for a collection of {@link SummaryStatistics}.
	*
	* <p><strong>Preconditions</strong>: <ul>
	* <li>The categoryData <code>Collection</code> must contain
	* {@link SummaryStatistics}.</li>
	* <li> There must be at least two {@link SummaryStatistics} in the
	* <code>categoryData</code> collection and each of these statistics must
	* contain at least two values.</li></ul><p>
	* This implementation uses the
	* {@link org.apache.commons.statistics.distribution.FDistribution
	* commons-math F Distribution implementation} to estimate the exact
	* p-value, using the formula<pre>
	* p = 1 - cumulativeProbability(F)</pre>
	* where <code>F</code> is the F value and <code>cumulativeProbability</code>
	* is the commons-math implementation of the F distribution.
	*
	* @param categoryData <code>Collection</code> of {@link SummaryStatistics}
	* each containing data for one category
	* @param allowOneElementData if true, allow computation for one catagory
	* only or for one data element per category
	* @return Pvalue
	* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
	* @throws DimensionMismatchException if the length of the <code>categoryData</code>
	* array is less than 2 or a contained {@link SummaryStatistics} does not have
	* at least two values
	* @throws ConvergenceException if the p-value can not be computed due to a convergence error
	* @throws MaxCountExceededException if the maximum number of iterations is exceeded
	* @since 3.2
	*/
	public double anovaPValue(final Collection<SummaryStatistics> categoryData,
	final boolean allowOneElementData)
	throws NullArgumentException, DimensionMismatchException,
	ConvergenceException, MaxCountExceededException {

	final AnovaStats a = anovaStats(categoryData, allowOneElementData);
	// pass a null rng to avoid unneeded overhead as we will not sample from this distribution
	final FDistribution fdist = FDistribution.of(a.dfbg, a.dfwg);
	return 1.0 - fdist.cumulativeProbability(a.f);

	}

	/**
	* This method calls the method that actually does the calculations (except
	* P-value).
	*
	* @param categoryData
	* <code>Collection</code> of <code>double[]</code> arrays each
	* containing data for one category
	* @return computed AnovaStats
	* @throws NullArgumentException
	* if <code>categoryData</code> is <code>null</code>
	* @throws DimensionMismatchException
	* if the length of the <code>categoryData</code> array is less
	* than 2 or a contained <code>double[]</code> array does not
	* contain at least two values
	*/
	private AnovaStats anovaStats(final Collection<double[]> categoryData)
	throws NullArgumentException, DimensionMismatchException {

	NullArgumentException.check(categoryData);

	final Collection<SummaryStatistics> categoryDataSummaryStatistics =
	new ArrayList<>(categoryData.size());

	// convert arrays to SummaryStatistics
	for (final double[] data : categoryData) {
	final SummaryStatistics dataSummaryStatistics = new SummaryStatistics();
	categoryDataSummaryStatistics.add(dataSummaryStatistics);
	for (final double val : data) {
	dataSummaryStatistics.addValue(val);
	}
	}

	return anovaStats(categoryDataSummaryStatistics, false);

	}

	/**
	* Performs an ANOVA test, evaluating the null hypothesis that there
	* is no difference among the means of the data categories.
	*
	* <p><strong>Preconditions</strong>: <ul>
	* <li>The categoryData <code>Collection</code> must contain
	* <code>double[]</code> arrays.</li>
	* <li> There must be at least two <code>double[]</code> arrays in the
	* <code>categoryData</code> collection and each of these arrays must
	* contain at least two values.</li>
	* <li>alpha must be strictly greater than 0 and less than or equal to 0.5.
	* </li></ul><p>
	* This implementation uses the
	* {@link org.apache.commons.statistics.distribution.FDistribution
	* commons-math F Distribution implementation} to estimate the exact
	* p-value, using the formula<pre>
	* p = 1 - cumulativeProbability(F)</pre>
	* where <code>F</code> is the F value and <code>cumulativeProbability</code>
	* is the commons-math implementation of the F distribution.
	* <p>True is returned iff the estimated p-value is less than alpha.</p>
	*
	* @param categoryData <code>Collection</code> of <code>double[]</code>
	* arrays each containing data for one category
	* @param alpha significance level of the test
	* @return true if the null hypothesis can be rejected with
	* confidence 1 - alpha
	* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
	* @throws DimensionMismatchException if the length of the <code>categoryData</code>
	* array is less than 2 or a contained <code>double[]</code> array does not have
	* at least two values
	* @throws OutOfRangeException if <code>alpha</code> is not in the range (0, 0.5]
	* @throws ConvergenceException if the p-value can not be computed due to a convergence error
	* @throws MaxCountExceededException if the maximum number of iterations is exceeded
	*/
	public boolean anovaTest(final Collection<double[]> categoryData,
	final double alpha)
	throws NullArgumentException, DimensionMismatchException,
	OutOfRangeException, ConvergenceException, MaxCountExceededException {

	if (alpha <= 0 \|\| alpha > 0.5) {
	throw new OutOfRangeException(
	LocalizedFormats.OUT_OF_BOUND_SIGNIFICANCE_LEVEL,
	alpha, 0, 0.5);
	}
	return anovaPValue(categoryData) < alpha;

	}

	/**
	* This method actually does the calculations (except P-value).
	*
	* @param categoryData <code>Collection</code> of <code>double[]</code>
	* arrays each containing data for one category
	* @param allowOneElementData if true, allow computation for one catagory
	* only or for one data element per category
	* @return computed AnovaStats
	* @throws NullArgumentException if <code>categoryData</code> is <code>null</code>
	* @throws DimensionMismatchException if <code>allowOneElementData</code> is false and the number of
	* categories is less than 2 or a contained SummaryStatistics does not contain
	* at least two values
	*/
	private AnovaStats anovaStats(final Collection<SummaryStatistics> categoryData,
	final boolean allowOneElementData)
	throws NullArgumentException, DimensionMismatchException {

	NullArgumentException.check(categoryData);

	if (!allowOneElementData) {
	// check if we have enough categories
	if (categoryData.size() < 2) {
	throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_CATEGORIES_REQUIRED,
	categoryData.size(), 2);
	}

	// check if each category has enough data
	for (final SummaryStatistics array : categoryData) {
	if (array.getN() <= 1) {
	throw new DimensionMismatchException(LocalizedFormats.TWO_OR_MORE_VALUES_IN_CATEGORY_REQUIRED,
	(int) array.getN(), 2);
	}
	}
	}

	int dfwg = 0;
	double sswg = 0;
	double totsum = 0;
	double totsumsq = 0;
	int totnum = 0;

	for (final SummaryStatistics data : categoryData) {

	final double sum = data.getSum();
	final double sumsq = data.getSumsq();
	final int num = (int) data.getN();
	totnum += num;
	totsum += sum;
	totsumsq += sumsq;

	dfwg += num - 1;
	final double ss = sumsq - ((sum * sum) / num);
	sswg += ss;
	}

	final double sst = totsumsq - ((totsum * totsum) / totnum);
	final double ssbg = sst - sswg;
	final int dfbg = categoryData.size() - 1;
	final double msbg = ssbg / dfbg;
	final double mswg = sswg / dfwg;
	final double f = msbg / mswg;

	return new AnovaStats(dfbg, dfwg, f);

	}

	/**
	Convenience class to pass dfbg,dfwg,F values around within OneWayAnova.
	No get/set methods provided.
	*/
	private static final class AnovaStats {

	/** Degrees of freedom in numerator (between groups). */
	private final int dfbg;

	/** Degrees of freedom in denominator (within groups). */
	private final int dfwg;

	/** Statistic. */
	private final double f;

	/**
	* Constructor.
	* @param dfbg degrees of freedom in numerator (between groups)
	* @param dfwg degrees of freedom in denominator (within groups)
	* @param f statistic
	*/
	private AnovaStats(int dfbg, int dfwg, double f) {
	this.dfbg = dfbg;
	this.dfwg = dfwg;
	this.f = f;
	}
	}

	}