blob: 46e9273ec25b7d4f4a237e808f1d192679218456 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
M = read($1, data_type="frame", format="csv", header=TRUE,
naStrings= ["NA", "null"," ","NaN", "nan", "", " ", "_nan_", "inf", "?", "NAN", "99999", "99999.00"]);
Y = as.matrix(M[, ncol(M)]) + 1
F = M[, 1:ncol(M)-1]
tfspec = read($2, data_type="scalar", value_type="string")
upsample = as.logical($3)
if( tfspec != " " ) {
[X,meta] = transformencode(target=F, spec=tfspec);
X = imputeByMode(X);
}
else {
X = as.matrix(F);
}
[X,C,S] = scale(X=X, scale=TRUE, center=TRUE);
[Xtrain, Xtest, Ytrain, Ytest] = split(X=X, Y=Y, f=0.7, seed=3);
if( upsample ) {
# oversampling all classes other than majority
[Xtrain,Ytrain] = adasyn(X=Xtrain, Y=Ytrain, k=$4, seed=7);
}
B = multiLogReg(X=Xtrain, Y=Ytrain, icpt=2);
[P,yhat,acc] = multiLogRegPredict(X=Xtest, Y=Ytest, B=B);
print("accuracy: "+acc)
R = as.matrix(acc/100);
write(R, $5);