commons-math-legacy/src/main/java/org/apache/commons/math4/legacy/stat/regression/SimpleRegression.java - commons-math - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package org.apache.commons.math4.legacy.stat.regression;
 import java.io.Serializable;

 import org.apache.commons.statistics.distribution.TDistribution;
 import org.apache.commons.math4.legacy.exception.MathIllegalArgumentException;
 import org.apache.commons.math4.legacy.exception.NoDataException;
 import org.apache.commons.math4.legacy.exception.OutOfRangeException;
 import org.apache.commons.math4.legacy.exception.util.LocalizedFormats;
 import org.apache.commons.math4.legacy.core.jdkmath.AccurateMath;
 import org.apache.commons.numbers.core.Precision;

 /**
  * Estimates an ordinary least squares regression model
  * with one independent variable.
  * <p>
  * <code> y = intercept + slope * x  </code></p>
  * <p>
  * Standard errors for <code>intercept</code> and <code>slope</code> are
  * available as well as ANOVA, r-square and Pearson's r statistics.</p>
  * <p>
  * Observations (x,y pairs) can be added to the model one at a time or they
  * can be provided in a 2-dimensional array.  The observations are not stored
  * in memory, so there is no limit to the number of observations that can be
  * added to the model.</p>
  * <p>
  * <strong>Usage Notes</strong>: <ul>
  * <li> When there are fewer than two observations in the model, or when
  * there is no variation in the x values (i.e. all x values are the same)
  * all statistics return <code>NaN</code>. At least two observations with
  * different x coordinates are required to estimate a bivariate regression
  * model.
  * </li>
  * <li> Getters for the statistics always compute values based on the current
  * set of observations -- i.e., you can get statistics, then add more data
  * and get updated statistics without using a new instance.  There is no
  * "compute" method that updates all statistics.  Each of the getters performs
  * the necessary computations to return the requested statistic.
  * </li>
  * <li> The intercept term may be suppressed by passing {@code false} to
  * the {@link #SimpleRegression(boolean)} constructor.  When the
  * {@code hasIntercept} property is false, the model is estimated without a
  * constant term and {@link #getIntercept()} returns {@code 0}.</li>
  * </ul>
  *
  */
 public class SimpleRegression implements Serializable, UpdatingMultipleLinearRegression {

     /** Serializable version identifier. */
     private static final long serialVersionUID = -3004689053607543335L;

     /** sum of x values. */
     private double sumX;

     /** total variation in x (sum of squared deviations from xbar). */
     private double sumXX;

     /** sum of y values. */
     private double sumY;

     /** total variation in y (sum of squared deviations from ybar). */
     private double sumYY;

     /** sum of products. */
     private double sumXY;

     /** number of observations. */
     private long n;

     /** mean of accumulated x values, used in updating formulas. */
     private double xbar;

     /** mean of accumulated y values, used in updating formulas. */
     private double ybar;

     /** include an intercept or not. */
     private final boolean hasIntercept;
     // ---------------------Public methods--------------------------------------

     /**
      * Create an empty SimpleRegression instance.
      */
     public SimpleRegression() {
         this(true);
     }
     /**
     * Create a SimpleRegression instance, specifying whether or not to estimate
     * an intercept.
     *
     * <p>Use {@code false} to estimate a model with no intercept.  When the
     * {@code hasIntercept} property is false, the model is estimated without a
     * constant term and {@link #getIntercept()} returns {@code 0}.</p>
     *
     * @param includeIntercept whether or not to include an intercept term in
     * the regression model
     */
     public SimpleRegression(boolean includeIntercept) {
         super();
         hasIntercept = includeIntercept;
     }

     /**
      * Adds the observation (x,y) to the regression data set.
      * <p>
      * Uses updating formulas for means and sums of squares defined in
      * "Algorithms for Computing the Sample Variance: Analysis and
      * Recommendations", Chan, T.F., Golub, G.H., and LeVeque, R.J.
      * 1983, American Statistician, vol. 37, pp. 242-247, referenced in
      * Weisberg, S. "Applied Linear Regression". 2nd Ed. 1985.</p>
      *
      *
      * @param x independent variable value
      * @param y dependent variable value
      */
     public void addData(final double x,final double y) {
         if (n == 0) {
             xbar = x;
             ybar = y;
         } else {
             if( hasIntercept ){
                 final double fact1 = 1.0 + n;
                 final double fact2 = n / (1.0 + n);
                 final double dx = x - xbar;
                 final double dy = y - ybar;
                 sumXX += dx * dx * fact2;
                 sumYY += dy * dy * fact2;
                 sumXY += dx * dy * fact2;
                 xbar += dx / fact1;
                 ybar += dy / fact1;
             }
          }
         if( !hasIntercept ){
             sumXX += x * x ;
             sumYY += y * y ;
             sumXY += x * y ;
         }
         sumX += x;
         sumY += y;
         n++;
     }

     /**
      * Appends data from another regression calculation to this one.
      *
      * <p>The mean update formulae are based on a paper written by Philippe
      * P&eacute;bay:
      * <a
      * href="http://prod.sandia.gov/techlib/access-control.cgi/2008/086212.pdf">
      * Formulas for Robust, One-Pass Parallel Computation of Covariances and
      * Arbitrary-Order Statistical Moments</a>, 2008, Technical Report
      * SAND2008-6212, Sandia National Laboratories.</p>
      *
      * @param reg model to append data from
      * @since 3.3
      */
     public void append(SimpleRegression reg) {
         if (n == 0) {
             xbar = reg.xbar;
             ybar = reg.ybar;
             sumXX = reg.sumXX;
             sumYY = reg.sumYY;
             sumXY = reg.sumXY;
         } else {
             if (hasIntercept) {
                 final double fact1 = reg.n / (double) (reg.n + n);
                 final double fact2 = n * reg.n / (double) (reg.n + n);
                 final double dx = reg.xbar - xbar;
                 final double dy = reg.ybar - ybar;
                 sumXX += reg.sumXX + dx * dx * fact2;
                 sumYY += reg.sumYY + dy * dy * fact2;
                 sumXY += reg.sumXY + dx * dy * fact2;
                 xbar += dx * fact1;
                 ybar += dy * fact1;
             }else{
                 sumXX += reg.sumXX;
                 sumYY += reg.sumYY;
                 sumXY += reg.sumXY;
             }
         }
         sumX += reg.sumX;
         sumY += reg.sumY;
         n += reg.n;
     }

     /**
      * Removes the observation (x,y) from the regression data set.
      * <p>
      * Mirrors the addData method.  This method permits the use of
      * SimpleRegression instances in streaming mode where the regression
      * is applied to a sliding "window" of observations, however the caller is
      * responsible for maintaining the set of observations in the window.</p>
      *
      * The method has no effect if there are no points of data (i.e. n=0)
      *
      * @param x independent variable value
      * @param y dependent variable value
      */
     public void removeData(final double x,final double y) {
         if (n > 0) {
             if (hasIntercept) {
                 final double fact1 = n - 1.0;
                 final double fact2 = n / (n - 1.0);
                 final double dx = x - xbar;
                 final double dy = y - ybar;
                 sumXX -= dx * dx * fact2;
                 sumYY -= dy * dy * fact2;
                 sumXY -= dx * dy * fact2;
                 xbar -= dx / fact1;
                 ybar -= dy / fact1;
             } else {
                 final double fact1 = n - 1.0;
                 sumXX -= x * x;
                 sumYY -= y * y;
                 sumXY -= x * y;
                 xbar -= x / fact1;
                 ybar -= y / fact1;
             }
              sumX -= x;
              sumY -= y;
              n--;
         }
     }

     /**
      * Adds the observations represented by the elements in
      * <code>data</code>.
      * <p>
      * <code>(data[0][0],data[0][1])</code> will be the first observation, then
      * <code>(data[1][0],data[1][1])</code>, etc.</p>
      * <p>
      * This method does not replace data that has already been added.  The
      * observations represented by <code>data</code> are added to the existing
      * dataset.</p>
      * <p>
      * To replace all data, use <code>clear()</code> before adding the new
      * data.</p>
      *
      * @param data array of observations to be added
      * @throws ModelSpecificationException if the length of {@code data[i]} is not
      * greater than or equal to 2
      */
     public void addData(final double[][] data) throws ModelSpecificationException {
         for (int i = 0; i < data.length; i++) {
             if( data[i].length < 2 ){
                throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,
                     data[i].length, 2);
             }
             addData(data[i][0], data[i][1]);
         }
     }

     /**
      * Adds one observation to the regression model.
      *
      * @param x the independent variables which form the design matrix
      * @param y the dependent or response variable
      * @throws ModelSpecificationException if the length of {@code x} does not equal
      * the number of independent variables in the model
      */
     @Override
     public void addObservation(final double[] x,final double y)
             throws ModelSpecificationException {
         if( x == null || x.length == 0 ){
             throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION,x!=null?x.length:0, 1);
         }
         addData( x[0], y );
     }

     /**
      * Adds a series of observations to the regression model. The lengths of
      * x and y must be the same and x must be rectangular.
      *
      * @param x a series of observations on the independent variables
      * @param y a series of observations on the dependent variable
      * The length of x and y must be the same
      * @throws ModelSpecificationException if {@code x} is not rectangular, does not match
      * the length of {@code y} or does not contain sufficient data to estimate the model
      */
     @Override
     public void addObservations(final double[][] x,final double[] y) throws ModelSpecificationException {
         if ((x == null) || (y == null) || (x.length != y.length)) {
             throw new ModelSpecificationException(
                   LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE,
                   (x == null) ? 0 : x.length,
                   (y == null) ? 0 : y.length);
         }
         boolean obsOk = true;
         for( int i = 0 ; i < x.length; i++){
             if( x[i] == null || x[i].length == 0 ){
                 obsOk = false;
                 break;
             }
         }
         if( !obsOk ){
             throw new ModelSpecificationException(
                   LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS,
                   0, 1);
         }
         for( int i = 0 ; i < x.length ; i++){
             addData( x[i][0], y[i] );
         }
     }

     /**
      * Removes observations represented by the elements in <code>data</code>.
       * <p>
      * If the array is larger than the current n, only the first n elements are
      * processed.  This method permits the use of SimpleRegression instances in
      * streaming mode where the regression is applied to a sliding "window" of
      * observations, however the caller is responsible for maintaining the set
      * of observations in the window.</p>
      * <p>
      * To remove all data, use <code>clear()</code>.</p>
      *
      * @param data array of observations to be removed
      */
     public void removeData(double[][] data) {
         for (int i = 0; i < data.length && n > 0; i++) {
             removeData(data[i][0], data[i][1]);
         }
     }

     /**
      * Clears all data from the model.
      */
     @Override
     public void clear() {
         sumX = 0d;
         sumXX = 0d;
         sumY = 0d;
         sumYY = 0d;
         sumXY = 0d;
         n = 0;
     }

     /**
      * Returns the number of observations that have been added to the model.
      *
      * @return n number of observations that have been added.
      */
     @Override
     public long getN() {
         return n;
     }

     /**
      * Returns the "predicted" <code>y</code> value associated with the
      * supplied <code>x</code> value,  based on the data that has been
      * added to the model when this method is activated.
      * <p>
      * <code> predict(x) = intercept + slope * x </code></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>At least two observations (with at least two different x values)
      * must have been added before invoking this method. If this method is
      * invoked before a model can be estimated, <code>Double,NaN</code> is
      * returned.
      * </li></ul>
      *
      * @param x input <code>x</code> value
      * @return predicted <code>y</code> value
      */
     public double predict(final double x) {
         final double b1 = getSlope();
         if (hasIntercept) {
             return getIntercept(b1) + b1 * x;
         }
         return b1 * x;
     }

     /**
      * Returns the intercept of the estimated regression line, if
      * {@link #hasIntercept()} is true; otherwise 0.
      * <p>
      * The least squares estimate of the intercept is computed using the
      * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
      * The intercept is sometimes denoted b0.</p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>At least two observations (with at least two different x values)
      * must have been added before invoking this method. If this method is
      * invoked before a model can be estimated, <code>Double,NaN</code> is
      * returned.
      * </li></ul>
      *
      * @return the intercept of the regression line if the model includes an
      * intercept; 0 otherwise
      * @see #SimpleRegression(boolean)
      */
     public double getIntercept() {
         return hasIntercept ? getIntercept(getSlope()) : 0.0;
     }

     /**
      * Returns true if the model includes an intercept term.
      *
      * @return true if the regression includes an intercept; false otherwise
      * @see #SimpleRegression(boolean)
      */
     @Override
     public boolean hasIntercept() {
         return hasIntercept;
     }

     /**
     * Returns the slope of the estimated regression line.
     * <p>
     * The least squares estimate of the slope is computed using the
     * <a href="http://www.xycoon.com/estimation4.htm">normal equations</a>.
     * The slope is sometimes denoted b1.</p>
     * <p>
     * <strong>Preconditions</strong>: <ul>
     * <li>At least two observations (with at least two different x values)
     * must have been added before invoking this method. If this method is
     * invoked before a model can be estimated, <code>Double.NaN</code> is
     * returned.
     * </li></ul>
     *
     * @return the slope of the regression line
     */
     public double getSlope() {
         if (n < 2) {
             return Double.NaN; //not enough data
         }
         if (AccurateMath.abs(sumXX) < 10 * Double.MIN_VALUE) {
             return Double.NaN; //not enough variation in x
         }
         return sumXY / sumXX;
     }

     /**
      * Returns the <a href="http://www.xycoon.com/SumOfSquares.htm">
      * sum of squared errors</a> (SSE) associated with the regression
      * model.
      * <p>
      * The sum is computed using the computational formula</p>
      * <p>
      * <code>SSE = SYY - (SXY * SXY / SXX)</code></p>
      * <p>
      * where <code>SYY</code> is the sum of the squared deviations of the y
      * values about their mean, <code>SXX</code> is similarly defined and
      * <code>SXY</code> is the sum of the products of x and y mean deviations.
      * </p><p>
      * The sums are accumulated using the updating algorithm referenced in
      * {@link #addData}.</p>
      * <p>
      * The return value is constrained to be non-negative - i.e., if due to
      * rounding errors the computational formula returns a negative result,
      * 0 is returned.</p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>At least two observations (with at least two different x values)
      * must have been added before invoking this method. If this method is
      * invoked before a model can be estimated, <code>Double,NaN</code> is
      * returned.
      * </li></ul>
      *
      * @return sum of squared errors associated with the regression model
      */
     public double getSumSquaredErrors() {
         return AccurateMath.max(0d, sumYY - sumXY * sumXY / sumXX);
     }

     /**
      * Returns the sum of squared deviations of the y values about their mean.
      * <p>
      * This is defined as SSTO
      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a>.</p>
      * <p>
      * If {@code n < 2}, this returns <code>Double.NaN</code>.</p>
      *
      * @return sum of squared deviations of y values
      */
     public double getTotalSumSquares() {
         if (n < 2) {
             return Double.NaN;
         }
         return sumYY;
     }

     /**
      * Returns the sum of squared deviations of the x values about their mean.
      *
      * If <code>n &lt; 2</code>, this returns <code>Double.NaN</code>.
      *
      * @return sum of squared deviations of x values
      */
     public double getXSumSquares() {
         if (n < 2) {
             return Double.NaN;
         }
         return sumXX;
     }

     /**
      * Returns the sum of crossproducts, x<sub>i</sub>*y<sub>i</sub>.
      *
      * @return sum of cross products
      */
     public double getSumOfCrossProducts() {
         return sumXY;
     }

     /**
      * Returns the sum of squared deviations of the predicted y values about
      * their mean (which equals the mean of y).
      * <p>
      * This is usually abbreviated SSR or SSM.  It is defined as SSM
      * <a href="http://www.xycoon.com/SumOfSquares.htm">here</a></p>
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>At least two observations (with at least two different x values)
      * must have been added before invoking this method. If this method is
      * invoked before a model can be estimated, <code>Double.NaN</code> is
      * returned.
      * </li></ul>
      *
      * @return sum of squared deviations of predicted y values
      */
     public double getRegressionSumSquares() {
         return getRegressionSumSquares(getSlope());
     }

     /**
      * Returns the sum of squared errors divided by the degrees of freedom,
      * usually abbreviated MSE.
      * <p>
      * If there are fewer than <strong>three</strong> data pairs in the model,
      * or if there is no variation in <code>x</code>, this returns
      * <code>Double.NaN</code>.</p>
      *
      * @return sum of squared deviations of y values
      */
     public double getMeanSquareError() {
         if (n < 3) {
             return Double.NaN;
         }
         return hasIntercept ? (getSumSquaredErrors() / (n - 2)) : (getSumSquaredErrors() / (n - 1));
     }

     /**
      * Returns <a href="http://mathworld.wolfram.com/CorrelationCoefficient.html">
      * Pearson's product moment correlation coefficient</a>,
      * usually denoted r.
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>At least two observations (with at least two different x values)
      * must have been added before invoking this method. If this method is
      * invoked before a model can be estimated, <code>Double,NaN</code> is
      * returned.
      * </li></ul>
      *
      * @return Pearson's r
      */
     public double getR() {
         double b1 = getSlope();
         double result = AccurateMath.sqrt(getRSquare());
         if (b1 < 0) {
             result = -result;
         }
         return result;
     }

     /**
      * Returns the <a href="http://www.xycoon.com/coefficient1.htm">
      * coefficient of determination</a>,
      * usually denoted r-square.
      * <p>
      * <strong>Preconditions</strong>: <ul>
      * <li>At least two observations (with at least two different x values)
      * must have been added before invoking this method. If this method is
      * invoked before a model can be estimated, <code>Double,NaN</code> is
      * returned.
      * </li></ul>
      *
      * @return r-square
      */
     public double getRSquare() {
         double ssto = getTotalSumSquares();
         return (ssto - getSumSquaredErrors()) / ssto;
     }

     /**
      * Returns the <a href="http://www.xycoon.com/standarderrorb0.htm">
      * standard error of the intercept estimate</a>,
      * usually denoted s(b0).
      * <p>
      * If there are fewer that <strong>three</strong> observations in the
      * model, or if there is no variation in x, this returns
      * <code>Double.NaN</code>.</p> Additionally, a <code>Double.NaN</code> is
      * returned when the intercept is constrained to be zero
      *
      * @return standard error associated with intercept estimate
      */
     public double getInterceptStdErr() {
         if( !hasIntercept ){
             return Double.NaN;
         }
         return AccurateMath.sqrt(
             getMeanSquareError() * ((1d / n) + (xbar * xbar) / sumXX));
     }

     /**
      * Returns the <a href="http://www.xycoon.com/standerrorb(1).htm">standard
      * error of the slope estimate</a>,
      * usually denoted s(b1).
      * <p>
      * If there are fewer that <strong>three</strong> data pairs in the model,
      * or if there is no variation in x, this returns <code>Double.NaN</code>.
      * </p>
      *
      * @return standard error associated with slope estimate
      */
     public double getSlopeStdErr() {
         return AccurateMath.sqrt(getMeanSquareError() / sumXX);
     }

     /**
      * Returns the half-width of a 95% confidence interval for the slope
      * estimate.
      * <p>
      * The 95% confidence interval is</p>
      * <p>
      * <code>(getSlope() - getSlopeConfidenceInterval(),
      * getSlope() + getSlopeConfidenceInterval())</code></p>
      * <p>
      * If there are fewer that <strong>three</strong> observations in the
      * model, or if there is no variation in x, this returns
      * <code>Double.NaN</code>.</p>
      * <p>
      * <strong>Usage Note</strong>:<br>
      * The validity of this statistic depends on the assumption that the
      * observations included in the model are drawn from a
      * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
      * Bivariate Normal Distribution</a>.</p>
      *
      * @return half-width of 95% confidence interval for the slope estimate
      * @throws OutOfRangeException if the confidence interval can not be computed.
      */
     public double getSlopeConfidenceInterval() throws OutOfRangeException {
         return getSlopeConfidenceInterval(0.05d);
     }

     /**
      * Returns the half-width of a (100-100*alpha)% confidence interval for
      * the slope estimate.
      * <p>
      * The (100-100*alpha)% confidence interval is </p>
      * <p>
      * <code>(getSlope() - getSlopeConfidenceInterval(),
      * getSlope() + getSlopeConfidenceInterval())</code></p>
      * <p>
      * To request, for example, a 99% confidence interval, use
      * <code>alpha = .01</code></p>
      * <p>
      * <strong>Usage Note</strong>:<br>
      * The validity of this statistic depends on the assumption that the
      * observations included in the model are drawn from a
      * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
      * Bivariate Normal Distribution</a>.</p>
      * <p>
      * <strong> Preconditions:</strong><ul>
      * <li>If there are fewer that <strong>three</strong> observations in the
      * model, or if there is no variation in x, this returns
      * <code>Double.NaN</code>.
      * </li>
      * <li>{@code (0 < alpha < 1)}; otherwise an
      * <code>OutOfRangeException</code> is thrown.
      * </li></ul>
      *
      * @param alpha the desired significance level
      * @return half-width of 95% confidence interval for the slope estimate
      * @throws OutOfRangeException if the confidence interval can not be computed.
      */
     public double getSlopeConfidenceInterval(final double alpha)
     throws OutOfRangeException {
         if (n < 3) {
             return Double.NaN;
         }
         if (alpha >= 1 || alpha <= 0) {
             throw new OutOfRangeException(LocalizedFormats.SIGNIFICANCE_LEVEL,
                                           alpha, 0, 1);
         }
         // No advertised NotStrictlyPositiveException here - will return NaN above
         TDistribution distribution = TDistribution.of(n - 2d);
         return getSlopeStdErr() *
             distribution.inverseCumulativeProbability(1d - alpha / 2d);
     }

     /**
      * Returns the significance level of the slope (equiv) correlation.
      * <p>
      * Specifically, the returned value is the smallest <code>alpha</code>
      * such that the slope confidence interval with significance level
      * equal to <code>alpha</code> does not include <code>0</code>.
      * On regression output, this is often denoted {@code Prob(|t| > 0)}
      * </p><p>
      * <strong>Usage Note</strong>:<br>
      * The validity of this statistic depends on the assumption that the
      * observations included in the model are drawn from a
      * <a href="http://mathworld.wolfram.com/BivariateNormalDistribution.html">
      * Bivariate Normal Distribution</a>.</p>
      * <p>
      * If there are fewer that <strong>three</strong> observations in the
      * model, or if there is no variation in x, this returns
      * <code>Double.NaN</code>.</p>
      *
      * @return significance level for slope/correlation
      * @throws org.apache.commons.math4.legacy.exception.MaxCountExceededException
      * if the significance level can not be computed.
      */
     public double getSignificance() {
         if (n < 3) {
             return Double.NaN;
         }
         // No advertised NotStrictlyPositiveException here - will return NaN above
         TDistribution distribution = TDistribution.of(n - 2d);
         return 2d * (1.0 - distribution.cumulativeProbability(
                     AccurateMath.abs(getSlope()) / getSlopeStdErr()));
     }

     // ---------------------Private methods-----------------------------------

     /**
     * Returns the intercept of the estimated regression line, given the slope.
     * <p>
     * Will return <code>NaN</code> if slope is <code>NaN</code>.</p>
     *
     * @param slope current slope
     * @return the intercept of the regression line
     */
     private double getIntercept(final double slope) {
       if( hasIntercept){
         return (sumY - slope * sumX) / n;
       }
       return 0.0;
     }

     /**
      * Computes SSR from b1.
      *
      * @param slope regression slope estimate
      * @return sum of squared deviations of predicted y values
      */
     private double getRegressionSumSquares(final double slope) {
         return slope * slope * sumXX;
     }

     /**
      * Performs a regression on data present in buffers and outputs a RegressionResults object.
      *
      * <p>If there are fewer than 3 observations in the model and {@code hasIntercept} is true
      * a {@code NoDataException} is thrown.  If there is no intercept term, the model must
      * contain at least 2 observations.</p>
      *
      * @return RegressionResults acts as a container of regression output
      * @throws ModelSpecificationException if the model is not correctly specified
      * @throws NoDataException if there is not sufficient data in the model to
      * estimate the regression parameters
      */
     @Override
     public RegressionResults regress() throws ModelSpecificationException, NoDataException {
         if (hasIntercept) {
             if (n < 3) {
                 throw new NoDataException(LocalizedFormats.NOT_ENOUGH_DATA_REGRESSION);
             }
             if (AccurateMath.abs(sumXX) > Precision.SAFE_MIN) {
                 final double[] params = new double[] { getIntercept(), getSlope() };
                 final double mse = getMeanSquareError();
                 final double syy = sumYY + sumY * sumY / n;
                 final double[] vcv = new double[] { mse * (xbar * xbar / sumXX + 1.0 / n), -xbar * mse / sumXX, mse / sumXX };
                 return new RegressionResults(params, new double[][] { vcv }, true, n, 2, sumY, syy, getSumSquaredErrors(), true,
                         false);
             } else {
                 final double[] params = new double[] { sumY / n, Double.NaN };
                 // final double mse = getMeanSquareError();
                 final double[] vcv = new double[] { ybar / (n - 1.0), Double.NaN, Double.NaN };
                 return new RegressionResults(params, new double[][] { vcv }, true, n, 1, sumY, sumYY, getSumSquaredErrors(), true,
                         false);
             }
         } else {
             if (n < 2) {
                 throw new NoDataException(LocalizedFormats.NOT_ENOUGH_DATA_REGRESSION);
             }
             if (!Double.isNaN(sumXX)) {
                 final double[] vcv = new double[] { getMeanSquareError() / sumXX };
                 final double[] params = new double[] { sumXY / sumXX };
                 return new RegressionResults(params, new double[][] { vcv }, true, n, 1, sumY, sumYY, getSumSquaredErrors(), false,
                         false);
             } else {
                 final double[] vcv = new double[] { Double.NaN };
                 final double[] params = new double[] { Double.NaN };
                 return new RegressionResults(params, new double[][] { vcv }, true, n, 1, Double.NaN, Double.NaN, Double.NaN, false,
                         false);
             }
         }
     }

     /**
      * Performs a regression on data present in buffers including only regressors.
      * indexed in variablesToInclude and outputs a RegressionResults object
      * @param variablesToInclude an array of indices of regressors to include
      * @return RegressionResults acts as a container of regression output
      * @throws MathIllegalArgumentException if the variablesToInclude array is null or zero length
      * @throws OutOfRangeException if a requested variable is not present in model
      */
     @Override
     public RegressionResults regress(int[] variablesToInclude) throws MathIllegalArgumentException{
         if( variablesToInclude == null || variablesToInclude.length == 0){
           throw new MathIllegalArgumentException(LocalizedFormats.ARRAY_ZERO_LENGTH_OR_NULL_NOT_ALLOWED);
         }
         if( variablesToInclude.length > 2 || (variablesToInclude.length > 1 && !hasIntercept) ){
             throw new ModelSpecificationException(
                     LocalizedFormats.ARRAY_SIZE_EXCEEDS_MAX_VARIABLES,
                     (variablesToInclude.length > 1 && !hasIntercept) ? 1 : 2);
         }

         if( hasIntercept ){
             if( variablesToInclude.length == 2 ){
                 if( variablesToInclude[0] == 1 ){
                     throw new ModelSpecificationException(LocalizedFormats.NOT_INCREASING_SEQUENCE);
                 }else if( variablesToInclude[0] != 0 ){
                     throw new OutOfRangeException( variablesToInclude[0], 0,1 );
                 }
                 if( variablesToInclude[1] != 1){
                      throw new OutOfRangeException( variablesToInclude[0], 0,1 );
                 }
                 return regress();
             }else{
                 if( variablesToInclude[0] != 1 && variablesToInclude[0] != 0 ){
                      throw new OutOfRangeException( variablesToInclude[0],0,1 );
                 }
                 final double mean = sumY * sumY / n;
                 final double syy = sumYY + mean;
                 if( variablesToInclude[0] == 0 ){
                     //just the mean
                     final double[] vcv = new double[]{ sumYY/(((n-1)*n)) };
                     final double[] params = new double[]{ ybar };
                     return new RegressionResults(
                       params, new double[][]{vcv}, true, n, 1,
                       sumY, syy+mean, sumYY,true,false);

                 }else if( variablesToInclude[0] == 1){
                     //final double _syy = sumYY + sumY * sumY / ((double) n);
                     final double sxx = sumXX + sumX * sumX / n;
                     final double sxy = sumXY + sumX * sumY / n;
                     final double sse = AccurateMath.max(0d, syy - sxy * sxy / sxx);
                     final double mse = sse/((n-1));
                     if( !Double.isNaN(sxx) ){
                         final double[] vcv = new double[]{ mse / sxx };
                         final double[] params = new double[]{ sxy/sxx };
                         return new RegressionResults(
                                     params, new double[][]{vcv}, true, n, 1,
                                     sumY, syy, sse,false,false);
                     }else{
                         final double[] vcv = new double[]{Double.NaN };
                         final double[] params = new double[]{ Double.NaN };
                         return new RegressionResults(
                                     params, new double[][]{vcv}, true, n, 1,
                                     Double.NaN, Double.NaN, Double.NaN,false,false);
                     }
                 }
             }
         }else{
             if( variablesToInclude[0] != 0 ){
                 throw new OutOfRangeException(variablesToInclude[0],0,0);
             }
             return regress();
         }

         return null;
     }
 }