| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| package org.apache.commons.math4.legacy.stat.regression; |
| |
| import java.util.Arrays; |
| |
| import org.apache.commons.math4.legacy.exception.util.LocalizedFormats; |
| import org.apache.commons.math4.core.jdkmath.JdkMath; |
| import org.apache.commons.numbers.core.Precision; |
| |
| /** |
| * This class is a concrete implementation of the {@link UpdatingMultipleLinearRegression} interface. |
| * |
| * <p>The algorithm is described in: <pre> |
| * Algorithm AS 274: Least Squares Routines to Supplement Those of Gentleman |
| * Author(s): Alan J. Miller |
| * Source: Journal of the Royal Statistical Society. |
| * Series C (Applied Statistics), Vol. 41, No. 2 |
| * (1992), pp. 458-478 |
| * Published by: Blackwell Publishing for the Royal Statistical Society |
| * Stable URL: http://www.jstor.org/stable/2347583 </pre> |
| * |
| * <p>This method for multiple regression forms the solution to the OLS problem |
| * by updating the QR decomposition as described by Gentleman.</p> |
| * |
| * @since 3.0 |
| */ |
| public class MillerUpdatingRegression implements UpdatingMultipleLinearRegression { |
| |
| /** number of variables in regression. */ |
| private final int nvars; |
| /** diagonals of cross products matrix. */ |
| private final double[] d; |
| /** the elements of the R`Y. */ |
| private final double[] rhs; |
| /** the off diagonal portion of the R matrix. */ |
| private final double[] r; |
| /** the tolerance for each of the variables. */ |
| private final double[] tol; |
| /** residual sum of squares for all nested regressions. */ |
| private final double[] rss; |
| /** order of the regressors. */ |
| private final int[] vorder; |
| /** scratch space for tolerance calc. */ |
| private final double[] workTolset; |
| /** number of observations entered. */ |
| private long nobs; |
| /** sum of squared errors of largest regression. */ |
| private double sserr; |
| /** has rss been called? */ |
| private boolean rssSet; |
| /** has the tolerance setting method been called. */ |
| private boolean tolSet; |
| /** flags for variables with linear dependency problems. */ |
| private final boolean[] lindep; |
| /** singular x values. */ |
| private final double[] xSing; |
| /** workspace for singularity method. */ |
| private final double[] workSing; |
| /** summation of Y variable. */ |
| private double sumy; |
| /** summation of squared Y values. */ |
| private double sumsqy; |
| /** boolean flag whether a regression constant is added. */ |
| private final boolean hasIntercept; |
| /** zero tolerance. */ |
| private final double epsilon; |
| /** |
| * Set the default constructor to private access. |
| * to prevent inadvertent instantiation |
| */ |
| @SuppressWarnings("unused") |
| private MillerUpdatingRegression() { |
| this(-1, false, Double.NaN); |
| } |
| |
| /** |
| * This is the augmented constructor for the MillerUpdatingRegression class. |
| * |
| * @param numberOfVariables number of regressors to expect, not including constant |
| * @param includeConstant include a constant automatically |
| * @param errorTolerance zero tolerance, how machine zero is determined |
| * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} |
| */ |
| public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant, double errorTolerance) |
| throws ModelSpecificationException { |
| if (numberOfVariables < 1) { |
| throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); |
| } |
| if (includeConstant) { |
| this.nvars = numberOfVariables + 1; |
| } else { |
| this.nvars = numberOfVariables; |
| } |
| this.hasIntercept = includeConstant; |
| this.nobs = 0; |
| this.d = new double[this.nvars]; |
| this.rhs = new double[this.nvars]; |
| this.r = new double[this.nvars * (this.nvars - 1) / 2]; |
| this.tol = new double[this.nvars]; |
| this.rss = new double[this.nvars]; |
| this.vorder = new int[this.nvars]; |
| this.xSing = new double[this.nvars]; |
| this.workSing = new double[this.nvars]; |
| this.workTolset = new double[this.nvars]; |
| this.lindep = new boolean[this.nvars]; |
| for (int i = 0; i < this.nvars; i++) { |
| vorder[i] = i; |
| } |
| if (errorTolerance > 0) { |
| this.epsilon = errorTolerance; |
| } else { |
| this.epsilon = -errorTolerance; |
| } |
| } |
| |
| /** |
| * Primary constructor for the MillerUpdatingRegression. |
| * |
| * @param numberOfVariables maximum number of potential regressors |
| * @param includeConstant include a constant automatically |
| * @throws ModelSpecificationException if {@code numberOfVariables is less than 1} |
| */ |
| public MillerUpdatingRegression(int numberOfVariables, boolean includeConstant) |
| throws ModelSpecificationException { |
| this(numberOfVariables, includeConstant, Precision.EPSILON); |
| } |
| |
| /** |
| * A getter method which determines whether a constant is included. |
| * @return true regression has an intercept, false no intercept |
| */ |
| @Override |
| public boolean hasIntercept() { |
| return this.hasIntercept; |
| } |
| |
| /** |
| * Gets the number of observations added to the regression model. |
| * @return number of observations |
| */ |
| @Override |
| public long getN() { |
| return this.nobs; |
| } |
| |
| /** |
| * Adds an observation to the regression model. |
| * @param x the array with regressor values |
| * @param y the value of dependent variable given these regressors |
| * @exception ModelSpecificationException if the length of {@code x} does not equal |
| * the number of independent variables in the model |
| */ |
| @Override |
| public void addObservation(final double[] x, final double y) |
| throws ModelSpecificationException { |
| |
| if ((!this.hasIntercept && x.length != nvars) || |
| (this.hasIntercept && x.length + 1 != nvars)) { |
| throw new ModelSpecificationException(LocalizedFormats.INVALID_REGRESSION_OBSERVATION, |
| x.length, nvars); |
| } |
| if (!this.hasIntercept) { |
| include(Arrays.copyOf(x, x.length), 1.0, y); |
| } else { |
| final double[] tmp = new double[x.length + 1]; |
| System.arraycopy(x, 0, tmp, 1, x.length); |
| tmp[0] = 1.0; |
| include(tmp, 1.0, y); |
| } |
| ++nobs; |
| |
| } |
| |
| /** |
| * Adds multiple observations to the model. |
| * @param x observations on the regressors |
| * @param y observations on the regressand |
| * @throws ModelSpecificationException if {@code x} is not rectangular, does not match |
| * the length of {@code y} or does not contain sufficient data to estimate the model |
| */ |
| @Override |
| public void addObservations(double[][] x, double[] y) throws ModelSpecificationException { |
| if (x == null || y == null || x.length != y.length) { |
| throw new ModelSpecificationException( |
| LocalizedFormats.DIMENSIONS_MISMATCH_SIMPLE, |
| (x == null) ? 0 : x.length, |
| (y == null) ? 0 : y.length); |
| } |
| if (x.length == 0) { // Must be no y data either |
| throw new ModelSpecificationException( |
| LocalizedFormats.NO_DATA); |
| } |
| if (x[0].length + 1 > x.length) { |
| throw new ModelSpecificationException( |
| LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, |
| x.length, x[0].length); |
| } |
| for (int i = 0; i < x.length; i++) { |
| addObservation(x[i], y[i]); |
| } |
| } |
| |
| /** |
| * The include method is where the QR decomposition occurs. This statement forms all |
| * intermediate data which will be used for all derivative measures. |
| * According to the miller paper, note that in the original implementation the x vector |
| * is overwritten. In this implementation, the include method is passed a copy of the |
| * original data vector so that there is no contamination of the data. Additionally, |
| * this method differs slightly from Gentleman's method, in that the assumption is |
| * of dense design matrices, there is some advantage in using the original gentleman algorithm |
| * on sparse matrices. |
| * |
| * @param x observations on the regressors |
| * @param wi weight of the this observation (-1,1) |
| * @param yi observation on the regressand |
| */ |
| private void include(final double[] x, final double wi, final double yi) { |
| int nextr = 0; |
| double w = wi; |
| double y = yi; |
| double xi; |
| double di; |
| double wxi; |
| double dpi; |
| double xk; |
| double wPrev; |
| this.rssSet = false; |
| sumy = smartAdd(yi, sumy); |
| sumsqy = smartAdd(sumsqy, yi * yi); |
| for (int i = 0; i < x.length; i++) { |
| if (w == 0.0) { |
| return; |
| } |
| xi = x[i]; |
| |
| if (xi == 0.0) { |
| nextr += nvars - i - 1; |
| continue; |
| } |
| di = d[i]; |
| wxi = w * xi; |
| wPrev = w; |
| if (di != 0.0) { |
| dpi = smartAdd(di, wxi * xi); |
| final double tmp = wxi * xi / di; |
| if (JdkMath.abs(tmp) > Precision.EPSILON) { |
| w = (di * w) / dpi; |
| } |
| } else { |
| dpi = wxi * xi; |
| w = 0.0; |
| } |
| d[i] = dpi; |
| for (int k = i + 1; k < nvars; k++) { |
| xk = x[k]; |
| x[k] = smartAdd(xk, -xi * r[nextr]); |
| if (di != 0.0) { |
| r[nextr] = smartAdd(di * r[nextr], (wPrev * xi) * xk) / dpi; |
| } else { |
| r[nextr] = xk / xi; |
| } |
| ++nextr; |
| } |
| xk = y; |
| y = smartAdd(xk, -xi * rhs[i]); |
| if (di != 0.0) { |
| rhs[i] = smartAdd(di * rhs[i], wxi * xk) / dpi; |
| } else { |
| rhs[i] = xk / xi; |
| } |
| } |
| sserr = smartAdd(sserr, w * y * y); |
| } |
| |
| /** |
| * Adds to number a and b such that the contamination due to |
| * numerical smallness of one addend does not corrupt the sum. |
| * @param a - an addend |
| * @param b - an addend |
| * @return the sum of the a and b |
| */ |
| private double smartAdd(double a, double b) { |
| final double aa = JdkMath.abs(a); |
| final double ba = JdkMath.abs(b); |
| if (aa > ba) { |
| final double eps = aa * Precision.EPSILON; |
| if (ba > eps) { |
| return a + b; |
| } |
| return a; |
| } else { |
| final double eps = ba * Precision.EPSILON; |
| if (aa > eps) { |
| return a + b; |
| } |
| return b; |
| } |
| } |
| |
| /** |
| * As the name suggests, clear wipes the internals and reorders everything in the |
| * canonical order. |
| */ |
| @Override |
| public void clear() { |
| Arrays.fill(this.d, 0.0); |
| Arrays.fill(this.rhs, 0.0); |
| Arrays.fill(this.r, 0.0); |
| Arrays.fill(this.tol, 0.0); |
| Arrays.fill(this.rss, 0.0); |
| Arrays.fill(this.workTolset, 0.0); |
| Arrays.fill(this.workSing, 0.0); |
| Arrays.fill(this.xSing, 0.0); |
| Arrays.fill(this.lindep, false); |
| for (int i = 0; i < nvars; i++) { |
| this.vorder[i] = i; |
| } |
| this.nobs = 0; |
| this.sserr = 0.0; |
| this.sumy = 0.0; |
| this.sumsqy = 0.0; |
| this.rssSet = false; |
| this.tolSet = false; |
| } |
| |
| /** |
| * This sets up tolerances for singularity testing. |
| */ |
| private void tolset() { |
| int pos; |
| double total; |
| final double eps = this.epsilon; |
| for (int i = 0; i < nvars; i++) { |
| this.workTolset[i] = JdkMath.sqrt(d[i]); |
| } |
| tol[0] = eps * this.workTolset[0]; |
| for (int col = 1; col < nvars; col++) { |
| pos = col - 1; |
| total = workTolset[col]; |
| for (int row = 0; row < col; row++) { |
| total += JdkMath.abs(r[pos]) * workTolset[row]; |
| pos += nvars - row - 2; |
| } |
| tol[col] = eps * total; |
| } |
| tolSet = true; |
| } |
| |
| /** |
| * The regcf method conducts the linear regression and extracts the |
| * parameter vector. Notice that the algorithm can do subset regression |
| * with no alteration. |
| * |
| * @param nreq how many of the regressors to include (either in canonical |
| * order, or in the current reordered state) |
| * @return an array with the estimated slope coefficients |
| * @throws ModelSpecificationException if {@code nreq} is less than 1 |
| * or greater than the number of independent variables |
| */ |
| private double[] regcf(int nreq) throws ModelSpecificationException { |
| int nextr; |
| if (nreq < 1) { |
| throw new ModelSpecificationException(LocalizedFormats.NO_REGRESSORS); |
| } |
| if (nreq > this.nvars) { |
| throw new ModelSpecificationException( |
| LocalizedFormats.TOO_MANY_REGRESSORS, nreq, this.nvars); |
| } |
| if (!this.tolSet) { |
| tolset(); |
| } |
| final double[] ret = new double[nreq]; |
| boolean rankProblem = false; |
| for (int i = nreq - 1; i > -1; i--) { |
| if (JdkMath.sqrt(d[i]) < tol[i]) { |
| ret[i] = 0.0; |
| d[i] = 0.0; |
| rankProblem = true; |
| } else { |
| ret[i] = rhs[i]; |
| nextr = i * (nvars + nvars - i - 1) / 2; |
| for (int j = i + 1; j < nreq; j++) { |
| ret[i] = smartAdd(ret[i], -r[nextr] * ret[j]); |
| ++nextr; |
| } |
| } |
| } |
| if (rankProblem) { |
| for (int i = 0; i < nreq; i++) { |
| if (this.lindep[i]) { |
| ret[i] = Double.NaN; |
| } |
| } |
| } |
| return ret; |
| } |
| |
| /** |
| * The method which checks for singularities and then eliminates the offending |
| * columns. |
| */ |
| private void singcheck() { |
| int pos; |
| for (int i = 0; i < nvars; i++) { |
| workSing[i] = JdkMath.sqrt(d[i]); |
| } |
| for (int col = 0; col < nvars; col++) { |
| // Set elements within R to zero if they are less than tol(col) in |
| // absolute value after being scaled by the square root of their row |
| // multiplier |
| final double temp = tol[col]; |
| pos = col - 1; |
| for (int row = 0; row < col - 1; row++) { |
| if (JdkMath.abs(r[pos]) * workSing[row] < temp) { |
| r[pos] = 0.0; |
| } |
| pos += nvars - row - 2; |
| } |
| // If diagonal element is near zero, set it to zero, set appropriate |
| // element of LINDEP, and use INCLUD to augment the projections in |
| // the lower rows of the orthogonalization. |
| lindep[col] = false; |
| if (workSing[col] < temp) { |
| lindep[col] = true; |
| if (col < nvars - 1) { |
| Arrays.fill(xSing, 0.0); |
| int pi = col * (nvars + nvars - col - 1) / 2; |
| for (int xi = col + 1; xi < nvars; xi++, pi++) { |
| xSing[xi] = r[pi]; |
| r[pi] = 0.0; |
| } |
| final double y = rhs[col]; |
| final double weight = d[col]; |
| d[col] = 0.0; |
| rhs[col] = 0.0; |
| this.include(xSing, weight, y); |
| } else { |
| sserr += d[col] * rhs[col] * rhs[col]; |
| } |
| } |
| } |
| } |
| |
| /** |
| * Calculates the sum of squared errors for the full regression. |
| * and all subsets in the following manner: <pre> |
| * rss[] ={ |
| * ResidualSumOfSquares_allNvars, |
| * ResidualSumOfSquares_FirstNvars-1, |
| * ResidualSumOfSquares_FirstNvars-2, |
| * ..., ResidualSumOfSquares_FirstVariable} </pre> |
| */ |
| private void ss() { |
| double total = sserr; |
| rss[nvars - 1] = sserr; |
| for (int i = nvars - 1; i > 0; i--) { |
| total += d[i] * rhs[i] * rhs[i]; |
| rss[i - 1] = total; |
| } |
| rssSet = true; |
| } |
| |
| /** |
| * Calculates the cov matrix assuming only the first nreq variables are |
| * included in the calculation. The returned array contains a symmetric |
| * matrix stored in lower triangular form. The matrix will have |
| * ( nreq + 1 ) * nreq / 2 elements. For illustration <pre> |
| * cov = |
| * { |
| * cov_00, |
| * cov_10, cov_11, |
| * cov_20, cov_21, cov22, |
| * ... |
| * } </pre> |
| * |
| * @param nreq how many of the regressors to include (either in canonical |
| * order, or in the current reordered state) |
| * @return an array with the variance covariance of the included |
| * regressors in lower triangular form |
| */ |
| private double[] cov(int nreq) { |
| if (this.nobs <= nreq) { |
| return null; |
| } |
| double rnk = 0.0; |
| for (int i = 0; i < nreq; i++) { |
| if (!this.lindep[i]) { |
| rnk += 1.0; |
| } |
| } |
| final double var = rss[nreq - 1] / (nobs - rnk); |
| final double[] rinv = new double[nreq * (nreq - 1) / 2]; |
| inverse(rinv, nreq); |
| final double[] covmat = new double[nreq * (nreq + 1) / 2]; |
| Arrays.fill(covmat, Double.NaN); |
| int pos2; |
| int pos1; |
| int start = 0; |
| double total = 0; |
| for (int row = 0; row < nreq; row++) { |
| pos2 = start; |
| if (!this.lindep[row]) { |
| for (int col = row; col < nreq; col++) { |
| if (!this.lindep[col]) { |
| pos1 = start + col - row; |
| if (row == col) { |
| total = 1.0 / d[col]; |
| } else { |
| total = rinv[pos1 - 1] / d[col]; |
| } |
| for (int k = col + 1; k < nreq; k++) { |
| if (!this.lindep[k]) { |
| total += rinv[pos1] * rinv[pos2] / d[k]; |
| } |
| ++pos1; |
| ++pos2; |
| } |
| covmat[ (col + 1) * col / 2 + row] = total * var; |
| } else { |
| pos2 += nreq - col - 1; |
| } |
| } |
| } |
| start += nreq - row - 1; |
| } |
| return covmat; |
| } |
| |
| /** |
| * This internal method calculates the inverse of the upper-triangular portion |
| * of the R matrix. |
| * @param rinv the storage for the inverse of r |
| * @param nreq how many of the regressors to include (either in canonical |
| * order, or in the current reordered state) |
| */ |
| private void inverse(double[] rinv, int nreq) { |
| int pos = nreq * (nreq - 1) / 2 - 1; |
| int pos1 = -1; |
| int pos2 = -1; |
| double total = 0.0; |
| Arrays.fill(rinv, Double.NaN); |
| for (int row = nreq - 1; row > 0; --row) { |
| if (!this.lindep[row]) { |
| final int start = (row - 1) * (nvars + nvars - row) / 2; |
| for (int col = nreq; col > row; --col) { |
| pos1 = start; |
| pos2 = pos; |
| total = 0.0; |
| for (int k = row; k < col - 1; k++) { |
| pos2 += nreq - k - 1; |
| if (!this.lindep[k]) { |
| total += -r[pos1] * rinv[pos2]; |
| } |
| ++pos1; |
| } |
| rinv[pos] = total - r[pos1]; |
| --pos; |
| } |
| } else { |
| pos -= nreq - row; |
| } |
| } |
| } |
| |
| /** |
| * In the original algorithm only the partial correlations of the regressors |
| * is returned to the user. In this implementation, we have <pre> |
| * corr = |
| * { |
| * corrxx - lower triangular |
| * corrxy - bottom row of the matrix |
| * } |
| * Replaces subroutines PCORR and COR of: |
| * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 </pre> |
| * |
| * <p>Calculate partial correlations after the variables in rows |
| * 1, 2, ..., IN have been forced into the regression. |
| * If IN = 1, and the first row of R represents a constant in the |
| * model, then the usual simple correlations are returned.</p> |
| * |
| * <p>If IN = 0, the value returned in array CORMAT for the correlation |
| * of variables Xi & Xj is: <pre> |
| * sum ( Xi.Xj ) / Sqrt ( sum (Xi^2) . sum (Xj^2) )</pre> |
| * |
| * <p>On return, array CORMAT contains the upper triangle of the matrix of |
| * partial correlations stored by rows, excluding the 1's on the diagonal. |
| * e.g. if IN = 2, the consecutive elements returned are: |
| * (3,4) (3,5) ... (3,ncol), (4,5) (4,6) ... (4,ncol), etc. |
| * Array YCORR stores the partial correlations with the Y-variable |
| * starting with YCORR(IN+1) = partial correlation with the variable in |
| * position (IN+1). </p> |
| * |
| * @param in how many of the regressors to include (either in canonical |
| * order, or in the current reordered state) |
| * @return an array with the partial correlations of the remainder of |
| * regressors with each other and the regressand, in lower triangular form |
| */ |
| public double[] getPartialCorrelations(int in) { |
| final double[] output = new double[(nvars - in + 1) * (nvars - in) / 2]; |
| int pos; |
| int pos1; |
| int pos2; |
| final int rmsOff = -in; |
| final int wrkOff = -(in + 1); |
| final double[] rms = new double[nvars - in]; |
| final double[] work = new double[nvars - in - 1]; |
| double sumxx; |
| double sumxy; |
| double sumyy; |
| final int offXX = (nvars - in) * (nvars - in - 1) / 2; |
| if (in < -1 || in >= nvars) { |
| return null; |
| } |
| final int nvm = nvars - 1; |
| final int basePos = r.length - (nvm - in) * (nvm - in + 1) / 2; |
| if (d[in] > 0.0) { |
| rms[in + rmsOff] = 1.0 / JdkMath.sqrt(d[in]); |
| } |
| for (int col = in + 1; col < nvars; col++) { |
| pos = basePos + col - 1 - in; |
| sumxx = d[col]; |
| for (int row = in; row < col; row++) { |
| sumxx += d[row] * r[pos] * r[pos]; |
| pos += nvars - row - 2; |
| } |
| if (sumxx > 0.0) { |
| rms[col + rmsOff] = 1.0 / JdkMath.sqrt(sumxx); |
| } else { |
| rms[col + rmsOff] = 0.0; |
| } |
| } |
| sumyy = sserr; |
| for (int row = in; row < nvars; row++) { |
| sumyy += d[row] * rhs[row] * rhs[row]; |
| } |
| if (sumyy > 0.0) { |
| sumyy = 1.0 / JdkMath.sqrt(sumyy); |
| } |
| pos = 0; |
| for (int col1 = in; col1 < nvars; col1++) { |
| sumxy = 0.0; |
| Arrays.fill(work, 0.0); |
| pos1 = basePos + col1 - in - 1; |
| for (int row = in; row < col1; row++) { |
| pos2 = pos1 + 1; |
| for (int col2 = col1 + 1; col2 < nvars; col2++) { |
| work[col2 + wrkOff] += d[row] * r[pos1] * r[pos2]; |
| pos2++; |
| } |
| sumxy += d[row] * r[pos1] * rhs[row]; |
| pos1 += nvars - row - 2; |
| } |
| pos2 = pos1 + 1; |
| for (int col2 = col1 + 1; col2 < nvars; col2++) { |
| work[col2 + wrkOff] += d[col1] * r[pos2]; |
| ++pos2; |
| output[ (col2 - 1 - in) * (col2 - in) / 2 + col1 - in] = |
| work[col2 + wrkOff] * rms[col1 + rmsOff] * rms[col2 + rmsOff]; |
| ++pos; |
| } |
| sumxy += d[col1] * rhs[col1]; |
| output[col1 + rmsOff + offXX] = sumxy * rms[col1 + rmsOff] * sumyy; |
| } |
| |
| return output; |
| } |
| |
| /** |
| * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2. |
| * Move variable from position FROM to position TO in an |
| * orthogonal reduction produced by AS75.1. |
| * |
| * @param from initial position |
| * @param to destination |
| */ |
| private void vmove(int from, int to) { |
| double d1; |
| double d2; |
| double x; |
| double d1new; |
| double d2new; |
| double cbar; |
| double sbar; |
| double y; |
| int first; |
| int inc; |
| int m1; |
| int m2; |
| int mp1; |
| int pos; |
| boolean bSkipTo40 = false; |
| if (from == to) { |
| return; |
| } |
| if (!this.rssSet) { |
| ss(); |
| } |
| int count = 0; |
| if (from < to) { |
| first = from; |
| inc = 1; |
| count = to - from; |
| } else { |
| first = from - 1; |
| inc = -1; |
| count = from - to; |
| } |
| |
| int m = first; |
| int idx = 0; |
| while (idx < count) { |
| m1 = m * (nvars + nvars - m - 1) / 2; |
| m2 = m1 + nvars - m - 1; |
| mp1 = m + 1; |
| |
| d1 = d[m]; |
| d2 = d[mp1]; |
| // Special cases. |
| if (d1 > this.epsilon || d2 > this.epsilon) { |
| x = r[m1]; |
| if (JdkMath.abs(x) * JdkMath.sqrt(d1) < tol[mp1]) { |
| x = 0.0; |
| } |
| if (d1 < this.epsilon || JdkMath.abs(x) < this.epsilon) { |
| d[m] = d2; |
| d[mp1] = d1; |
| r[m1] = 0.0; |
| for (int col = m + 2; col < nvars; col++) { |
| ++m1; |
| x = r[m1]; |
| r[m1] = r[m2]; |
| r[m2] = x; |
| ++m2; |
| } |
| x = rhs[m]; |
| rhs[m] = rhs[mp1]; |
| rhs[mp1] = x; |
| bSkipTo40 = true; |
| //break; |
| } else if (d2 < this.epsilon) { |
| d[m] = d1 * x * x; |
| r[m1] = 1.0 / x; |
| for (int i = m1 + 1; i < m1 + nvars - m - 1; i++) { |
| r[i] /= x; |
| } |
| rhs[m] /= x; |
| bSkipTo40 = true; |
| //break; |
| } |
| if (!bSkipTo40) { |
| d1new = d2 + d1 * x * x; |
| cbar = d2 / d1new; |
| sbar = x * d1 / d1new; |
| d2new = d1 * cbar; |
| d[m] = d1new; |
| d[mp1] = d2new; |
| r[m1] = sbar; |
| for (int col = m + 2; col < nvars; col++) { |
| ++m1; |
| y = r[m1]; |
| r[m1] = cbar * r[m2] + sbar * y; |
| r[m2] = y - x * r[m2]; |
| ++m2; |
| } |
| y = rhs[m]; |
| rhs[m] = cbar * rhs[mp1] + sbar * y; |
| rhs[mp1] = y - x * rhs[mp1]; |
| } |
| } |
| if (m > 0) { |
| pos = m; |
| for (int row = 0; row < m; row++) { |
| x = r[pos]; |
| r[pos] = r[pos - 1]; |
| r[pos - 1] = x; |
| pos += nvars - row - 2; |
| } |
| } |
| // Adjust variable order (VORDER), the tolerances (TOL) and |
| // the vector of residual sums of squares (RSS). |
| m1 = vorder[m]; |
| vorder[m] = vorder[mp1]; |
| vorder[mp1] = m1; |
| x = tol[m]; |
| tol[m] = tol[mp1]; |
| tol[mp1] = x; |
| rss[m] = rss[mp1] + d[mp1] * rhs[mp1] * rhs[mp1]; |
| |
| m += inc; |
| ++idx; |
| } |
| } |
| |
| /** |
| * ALGORITHM AS274 APPL. STATIST. (1992) VOL.41, NO. 2 |
| * |
| * <p> Re-order the variables in an orthogonal reduction produced by |
| * AS75.1 so that the N variables in LIST start at position POS1, |
| * though will not necessarily be in the same order as in LIST. |
| * Any variables in VORDER before position POS1 are not moved. |
| * Auxiliary routine called: VMOVE. </p> |
| * |
| * <p>This internal method reorders the regressors.</p> |
| * |
| * @param list the regressors to move |
| * @param pos1 where the list will be placed |
| * @return -1 error, 0 everything ok |
| */ |
| private int reorderRegressors(int[] list, int pos1) { |
| int next; |
| int i; |
| int l; |
| if (list.length < 1 || list.length > nvars + 1 - pos1) { |
| return -1; |
| } |
| next = pos1; |
| i = pos1; |
| while (i < nvars) { |
| l = vorder[i]; |
| for (int j = 0; j < list.length; j++) { |
| if (l == list[j] && i > next) { |
| this.vmove(i, next); |
| ++next; |
| if (next >= list.length + pos1) { |
| return 0; |
| } else { |
| break; |
| } |
| } |
| } |
| ++i; |
| } |
| return 0; |
| } |
| |
| /** |
| * Gets the diagonal of the Hat matrix also known as the leverage matrix. |
| * |
| * @param rowData returns the diagonal of the hat matrix for this observation |
| * @return the diagonal element of the hatmatrix |
| */ |
| public double getDiagonalOfHatMatrix(double[] rowData) { |
| double[] wk = new double[this.nvars]; |
| int pos; |
| double total; |
| |
| if (rowData.length > nvars) { |
| return Double.NaN; |
| } |
| double[] xrow; |
| if (this.hasIntercept) { |
| xrow = new double[rowData.length + 1]; |
| xrow[0] = 1.0; |
| System.arraycopy(rowData, 0, xrow, 1, rowData.length); |
| } else { |
| xrow = rowData; |
| } |
| double hii = 0.0; |
| for (int col = 0; col < xrow.length; col++) { |
| if (JdkMath.sqrt(d[col]) < tol[col]) { |
| wk[col] = 0.0; |
| } else { |
| pos = col - 1; |
| total = xrow[col]; |
| for (int row = 0; row < col; row++) { |
| total = smartAdd(total, -wk[row] * r[pos]); |
| pos += nvars - row - 2; |
| } |
| wk[col] = total; |
| hii = smartAdd(hii, (total * total) / d[col]); |
| } |
| } |
| return hii; |
| } |
| |
| /** |
| * Gets the order of the regressors, useful if some type of reordering |
| * has been called. Calling regress with int[]{} args will trigger |
| * a reordering. |
| * |
| * @return int[] with the current order of the regressors |
| */ |
| public int[] getOrderOfRegressors(){ |
| return Arrays.copyOf(vorder, vorder.length); |
| } |
| |
| /** |
| * Conducts a regression on the data in the model, using all regressors. |
| * |
| * @return RegressionResults the structure holding all regression results |
| * @exception ModelSpecificationException - thrown if number of observations is |
| * less than the number of variables |
| */ |
| @Override |
| public RegressionResults regress() throws ModelSpecificationException { |
| return regress(this.nvars); |
| } |
| |
| /** |
| * Conducts a regression on the data in the model, using a subset of regressors. |
| * |
| * @param numberOfRegressors many of the regressors to include (either in canonical |
| * order, or in the current reordered state) |
| * @return RegressionResults the structure holding all regression results |
| * @exception ModelSpecificationException - thrown if number of observations is |
| * less than the number of variables or number of regressors requested |
| * is greater than the regressors in the model |
| */ |
| public RegressionResults regress(int numberOfRegressors) throws ModelSpecificationException { |
| if (this.nobs <= numberOfRegressors) { |
| throw new ModelSpecificationException( |
| LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, |
| this.nobs, numberOfRegressors); |
| } |
| if( numberOfRegressors > this.nvars ){ |
| throw new ModelSpecificationException( |
| LocalizedFormats.TOO_MANY_REGRESSORS, numberOfRegressors, this.nvars); |
| } |
| |
| tolset(); |
| singcheck(); |
| |
| double[] beta = this.regcf(numberOfRegressors); |
| |
| ss(); |
| |
| double[] cov = this.cov(numberOfRegressors); |
| |
| int rnk = 0; |
| for (int i = 0; i < this.lindep.length; i++) { |
| if (!this.lindep[i]) { |
| ++rnk; |
| } |
| } |
| |
| boolean needsReorder = false; |
| for (int i = 0; i < numberOfRegressors; i++) { |
| if (this.vorder[i] != i) { |
| needsReorder = true; |
| break; |
| } |
| } |
| if (!needsReorder) { |
| return new RegressionResults( |
| beta, new double[][]{cov}, true, this.nobs, rnk, |
| this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); |
| } else { |
| double[] betaNew = new double[beta.length]; |
| double[] covNew = new double[cov.length]; |
| |
| int[] newIndices = new int[beta.length]; |
| for (int i = 0; i < nvars; i++) { |
| for (int j = 0; j < numberOfRegressors; j++) { |
| if (this.vorder[j] == i) { |
| betaNew[i] = beta[ j]; |
| newIndices[i] = j; |
| } |
| } |
| } |
| |
| int idx1 = 0; |
| int idx2; |
| int ii; |
| int jj; |
| for (int i = 0; i < beta.length; i++) { |
| ii = newIndices[i]; |
| for (int j = 0; j <= i; j++, idx1++) { |
| jj = newIndices[j]; |
| if (ii > jj) { |
| idx2 = ii * (ii + 1) / 2 + jj; |
| } else { |
| idx2 = jj * (jj + 1) / 2 + ii; |
| } |
| covNew[idx1] = cov[idx2]; |
| } |
| } |
| return new RegressionResults( |
| betaNew, new double[][]{covNew}, true, this.nobs, rnk, |
| this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); |
| } |
| } |
| |
| /** |
| * Conducts a regression on the data in the model, using regressors in array |
| * Calling this method will change the internal order of the regressors |
| * and care is required in interpreting the hatmatrix. |
| * |
| * @param variablesToInclude array of variables to include in regression |
| * @return RegressionResults the structure holding all regression results |
| * @exception ModelSpecificationException - thrown if number of observations is |
| * less than the number of variables, the number of regressors requested |
| * is greater than the regressors in the model or a regressor index in |
| * regressor array does not exist |
| */ |
| @Override |
| public RegressionResults regress(int[] variablesToInclude) throws ModelSpecificationException { |
| if (variablesToInclude.length > this.nvars) { |
| throw new ModelSpecificationException( |
| LocalizedFormats.TOO_MANY_REGRESSORS, variablesToInclude.length, this.nvars); |
| } |
| if (this.nobs <= this.nvars) { |
| throw new ModelSpecificationException( |
| LocalizedFormats.NOT_ENOUGH_DATA_FOR_NUMBER_OF_PREDICTORS, |
| this.nobs, this.nvars); |
| } |
| Arrays.sort(variablesToInclude); |
| int iExclude = 0; |
| for (int i = 0; i < variablesToInclude.length; i++) { |
| if (i >= this.nvars) { |
| throw new ModelSpecificationException( |
| LocalizedFormats.INDEX_LARGER_THAN_MAX, i, this.nvars); |
| } |
| if (i > 0 && variablesToInclude[i] == variablesToInclude[i - 1]) { |
| variablesToInclude[i] = -1; |
| ++iExclude; |
| } |
| } |
| int[] series; |
| if (iExclude > 0) { |
| int j = 0; |
| series = new int[variablesToInclude.length - iExclude]; |
| for (int i = 0; i < variablesToInclude.length; i++) { |
| if (variablesToInclude[i] > -1) { |
| series[j] = variablesToInclude[i]; |
| ++j; |
| } |
| } |
| } else { |
| series = variablesToInclude; |
| } |
| |
| reorderRegressors(series, 0); |
| tolset(); |
| singcheck(); |
| |
| double[] beta = this.regcf(series.length); |
| |
| ss(); |
| |
| double[] cov = this.cov(series.length); |
| |
| int rnk = 0; |
| for (int i = 0; i < this.lindep.length; i++) { |
| if (!this.lindep[i]) { |
| ++rnk; |
| } |
| } |
| |
| boolean needsReorder = false; |
| for (int i = 0; i < series.length; i++) { |
| if (this.vorder[i] != series[i]) { |
| needsReorder = true; |
| break; |
| } |
| } |
| if (!needsReorder) { |
| return new RegressionResults( |
| beta, new double[][]{cov}, true, this.nobs, rnk, |
| this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); |
| } else { |
| double[] betaNew = new double[beta.length]; |
| int[] newIndices = new int[beta.length]; |
| for (int i = 0; i < series.length; i++) { |
| for (int j = 0; j < this.vorder.length; j++) { |
| if (this.vorder[j] == series[i]) { |
| betaNew[i] = beta[ j]; |
| newIndices[i] = j; |
| } |
| } |
| } |
| double[] covNew = new double[cov.length]; |
| int idx1 = 0; |
| int idx2; |
| int ii; |
| int jj; |
| for (int i = 0; i < beta.length; i++) { |
| ii = newIndices[i]; |
| for (int j = 0; j <= i; j++, idx1++) { |
| jj = newIndices[j]; |
| if (ii > jj) { |
| idx2 = ii * (ii + 1) / 2 + jj; |
| } else { |
| idx2 = jj * (jj + 1) / 2 + ii; |
| } |
| covNew[idx1] = cov[idx2]; |
| } |
| } |
| return new RegressionResults( |
| betaNew, new double[][]{covNew}, true, this.nobs, rnk, |
| this.sumy, this.sumsqy, this.sserr, this.hasIntercept, false); |
| } |
| } |
| } |