| # Licensed to the Apache Software Foundation (ASF) under one or more |
| # contributor license agreements. See the NOTICE file distributed with |
| # this work for additional information regarding copyright ownership. |
| # The ASF licenses this file to You under the Apache License, Version 2.0 |
| # (the "License"); you may not use this file except in compliance with |
| # the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, software |
| # distributed under the License is distributed on an "AS IS" BASIS, |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| # See the License for the specific language governing permissions and |
| # limitations under the License. |
| # |
| #------------------------------------------------------------------------------ |
| # R source file to validate OLS multiple regression tests in |
| # org.apache.commons.math.stat.regression.OLSMultipleLinearRegressionTest |
| # |
| # To run the test, install R, put this file and testFunctions |
| # into the same directory, launch R from this directory and then enter |
| # source("<name-of-this-file>") |
| # |
| #------------------------------------------------------------------------------ |
| tol <- 1E-8 # error tolerance for tests |
| #------------------------------------------------------------------------------ |
| # Function definitions |
| |
| source("testFunctions") # utility test functions |
| options(digits=16) # override number of digits displayed |
| |
| # function to verify OLS computations |
| |
| verifyRegression <- function(model, expectedBeta, expectedResiduals, |
| expectedErrors, modelName) { |
| betaHat <- as.vector(coefficients(model)) |
| residuals <- as.vector(residuals(model)) |
| errors <- as.vector(as.matrix(coefficients(summary(model)))[,2]) |
| output <- c("Parameter test dataset = ", modelName) |
| if (assertEquals(expectedBeta,betaHat,tol,"Parameters")) { |
| displayPadded(output, SUCCEEDED, WIDTH) |
| } else { |
| displayPadded(output, FAILED, WIDTH) |
| } |
| output <- c("Residuals test dataset = ", modelName) |
| if (assertEquals(expectedResiduals,residuals,tol,"Residuals")) { |
| displayPadded(output, SUCCEEDED, WIDTH) |
| } else { |
| displayPadded(output, FAILED, WIDTH) |
| } |
| output <- c("Errors test dataset = ", modelName) |
| if (assertEquals(expectedErrors,errors,tol,"Errors")) { |
| displayPadded(output, SUCCEEDED, WIDTH) |
| } else { |
| displayPadded(output, FAILED, WIDTH) |
| } |
| } |
| |
| #-------------------------------------------------------------------------- |
| cat("Multiple regression OLS test cases\n") |
| |
| # Perfect fit |
| x1 <- c(0,2,0,0,0,0) |
| x2 <- c(0,0,3,0,0,0) |
| x3 <- c(0,0,0,4,0,0) |
| x4 <- c(0,0,0,0,5,0) |
| x5 <- c(0,0,0,0,0,6) |
| y <- c(11, 12, 13, 14, 15, 16) |
| model <- lm(y ~ x1 + x2 + x3 + x4 + x5) |
| expectedBeta <- c(11.0,0.5,0.666666666666667,0.75,0.8,0.8333333333333333) |
| expectedResiduals <- c(0,0,0,0,0,0) |
| expectedErrors <- c(NaN,NaN,NaN,NaN,NaN,NaN) |
| verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, |
| "perfect fit") |
| |
| # Longly |
| # |
| # Data Source: J. Longley (1967) "An Appraisal of Least Squares Programs for the |
| # Electronic Computer from the Point of View of the User", |
| # Journal of the American Statistical Association, |
| # vol. 62. September, pp. 819-841. |
| # |
| # Certified values (and data) are from NIST: |
| # http://www.itl.nist.gov/div898/strd/lls/data/LINKS/DATA/Longley.dat |
| # |
| design <- matrix(c(60323,83.0,234289,2356,1590,107608,1947, |
| 61122,88.5,259426,2325,1456,108632,1948, |
| 60171,88.2,258054,3682,1616,109773,1949, |
| 61187,89.5,284599,3351,1650,110929,1950, |
| 63221,96.2,328975,2099,3099,112075,1951, |
| 63639,98.1,346999,1932,3594,113270,1952, |
| 64989,99.0,365385,1870,3547,115094,1953, |
| 63761,100.0,363112,3578,3350,116219,1954, |
| 66019,101.2,397469,2904,3048,117388,1955, |
| 67857,104.6,419180,2822,2857,118734,1956, |
| 68169,108.4,442769,2936,2798,120445,1957, |
| 66513,110.8,444546,4681,2637,121950,1958, |
| 68655,112.6,482704,3813,2552,123366,1959, |
| 69564,114.2,502601,3931,2514,125368,1960, |
| 69331,115.7,518173,4806,2572,127852,1961, |
| 70551,116.9,554894,4007,2827,130081,1962), |
| nrow = 16, ncol = 7, byrow = TRUE) |
| y <- design[,1] |
| x1 <- design[,2] |
| x2 <- design[,3] |
| x3 <- design[,4] |
| x4 <- design[,5] |
| x5 <- design[,6] |
| x6 <- design[,7] |
| model <- lm(y ~ x1 + x2 + x3 + x4 + x5 + x6) |
| |
| estimates <- matrix(c(-3482258.63459582,890420.383607373, |
| 15.0618722713733,84.9149257747669, |
| -0.358191792925910E-01,0.334910077722432E-01, |
| -2.02022980381683,0.488399681651699, |
| -1.03322686717359,0.214274163161675, |
| -0.511041056535807E-01,0.226073200069370, |
| 1829.15146461355,455.478499142212), |
| nrow = 7, ncol = 2, byrow = TRUE) |
| |
| expectedBeta <- estimates[,1] |
| expectedErrors <- estimates[,2] |
| expectedResiduals <- c( 267.340029759711,-94.0139423988359,46.28716775752924, |
| -410.114621930906,309.7145907602313,-249.3112153297231,-164.0489563956039, |
| -13.18035686637081,14.30477260005235,455.394094551857,-17.26892711483297, |
| -39.0550425226967,-155.5499735953195,-85.6713080421283,341.9315139607727, |
| -206.7578251937366) |
| |
| verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, |
| "Longly") |
| |
| # Swiss Fertility (R dataset named "swiss") |
| |
| design <- matrix(c(80.2,17.0,15,12,9.96, |
| 83.1,45.1,6,9,84.84, |
| 92.5,39.7,5,5,93.40, |
| 85.8,36.5,12,7,33.77, |
| 76.9,43.5,17,15,5.16, |
| 76.1,35.3,9,7,90.57, |
| 83.8,70.2,16,7,92.85, |
| 92.4,67.8,14,8,97.16, |
| 82.4,53.3,12,7,97.67, |
| 82.9,45.2,16,13,91.38, |
| 87.1,64.5,14,6,98.61, |
| 64.1,62.0,21,12,8.52, |
| 66.9,67.5,14,7,2.27, |
| 68.9,60.7,19,12,4.43, |
| 61.7,69.3,22,5,2.82, |
| 68.3,72.6,18,2,24.20, |
| 71.7,34.0,17,8,3.30, |
| 55.7,19.4,26,28,12.11, |
| 54.3,15.2,31,20,2.15, |
| 65.1,73.0,19,9,2.84, |
| 65.5,59.8,22,10,5.23, |
| 65.0,55.1,14,3,4.52, |
| 56.6,50.9,22,12,15.14, |
| 57.4,54.1,20,6,4.20, |
| 72.5,71.2,12,1,2.40, |
| 74.2,58.1,14,8,5.23, |
| 72.0,63.5,6,3,2.56, |
| 60.5,60.8,16,10,7.72, |
| 58.3,26.8,25,19,18.46, |
| 65.4,49.5,15,8,6.10, |
| 75.5,85.9,3,2,99.71, |
| 69.3,84.9,7,6,99.68, |
| 77.3,89.7,5,2,100.00, |
| 70.5,78.2,12,6,98.96, |
| 79.4,64.9,7,3,98.22, |
| 65.0,75.9,9,9,99.06, |
| 92.2,84.6,3,3,99.46, |
| 79.3,63.1,13,13,96.83, |
| 70.4,38.4,26,12,5.62, |
| 65.7,7.7,29,11,13.79, |
| 72.7,16.7,22,13,11.22, |
| 64.4,17.6,35,32,16.92, |
| 77.6,37.6,15,7,4.97, |
| 67.6,18.7,25,7,8.65, |
| 35.0,1.2,37,53,42.34, |
| 44.7,46.6,16,29,50.43, |
| 42.8,27.7,22,29,58.33), |
| nrow = 47, ncol = 5, byrow = TRUE) |
| |
| y <- design[,1] |
| x1 <- design[,2] |
| x2 <- design[,3] |
| x3 <- design[,4] |
| x4 <- design[,5] |
| |
| model <- lm(y ~ x1 + x2 + x3 + x4) |
| |
| estimates <- matrix(c(91.05542390271397,6.94881329475087, |
| -0.22064551045715,0.07360008972340, |
| -0.26058239824328,0.27410957467466, |
| -0.96161238456030,0.19454551679325, |
| 0.12441843147162,0.03726654773803), |
| nrow = 5, ncol = 2, byrow = TRUE) |
| |
| expectedBeta <- estimates[,1] |
| expectedErrors <- estimates[,2] |
| |
| expectedResiduals <- c(7.1044267859730512,1.6580347433531366, |
| 4.6944952770029644,8.4548022690166160,13.6547432343186212, |
| -9.3586864458500774,7.5822446330520386,15.5568995563859289, |
| 0.8113090736598980,7.1186762732484308,7.4251378771228724, |
| 2.6761316873234109,0.8351584810309354,7.1769991119615177, |
| -3.8746753206299553,-3.1337779476387251,-0.1412575244091504, |
| 1.1186809170469780,-6.3588097346816594,3.4039270429434074, |
| 2.3374058329820175,-7.9272368576900503,-7.8361010968497959, |
| -11.2597369269357070,0.9445333697827101,6.6544245101380328, |
| -0.9146136301118665,-4.3152449403848570,-4.3536932047009183, |
| -3.8907885169304661,-6.3027643926302188,-7.8308982189289091, |
| -3.1792280015332750,-6.7167298771158226,-4.8469946718041754, |
| -10.6335664353633685,11.1031134362036958,6.0084032641811733, |
| 5.4326230830188482,-7.2375578629692230,2.1671550814448222, |
| 15.0147574652763112,4.8625103516321015,-7.1597256413907706, |
| -0.4515205619767598,-10.2916870903837587,-15.7812984571900063) |
| |
| verifyRegression(model, expectedBeta, expectedResiduals, expectedErrors, |
| "Swiss Fertility") |
| |
| |
| displayDashes(WIDTH) |