blob: 6398eb15ce63ba72835324f6e7d4798faee21020 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
checkR2 = function(Matrix[double] X, Matrix[double] y, Matrix[double] y_p,
Matrix[double] beta, Integer icpt) return (Double R2_ad)
{
n = nrow(X);
m = ncol(X);
m_ext = m;
if (icpt == 1|icpt == 2)
m_ext = m+1; #due to extra column ones
avg_tot = sum(y)/n;
ss_tot = sum(y^2);
ss_avg_tot = ss_tot - n*avg_tot^2;
y_res = y - y_p;
avg_res = sum(y - y_p)/n;
ss_res = sum((y - y_p)^2);
R2 = 1 - ss_res/ss_avg_tot;
dispersion = ifelse(n>m_ext, ss_res/(n-m_ext), NaN);
R2_ad = ifelse(n>m_ext, 1-dispersion/(ss_avg_tot/(n-1)), NaN);
}
# Get the dataset
M = 4000;
A = rand(rows=M, cols=500, seed=42);
y = rand(rows=M, cols=1, seed=43);
R = matrix(0, rows=1, cols=20);
K = floor(ncol(A) * 0.1);
nComb = 5; #10
for (i in 1:nComb) {
[newA1, Mout] = pca(X=A, K=K+i);
beta1 = lmDS(X=newA1, y=y, icpt=1, reg=0.0001, verbose=FALSE);
y_predict1 = lmPredict(X=newA1, B=beta1, icpt=1);
R2_ad1 = checkR2(newA1, y, y_predict1, beta1, 1);
R[,i] = R2_ad1;
}
R = sum(R);
write(R, $1, format="text");