blob: 88153d1af4ad0cebd3547a5d03615f148b168d12 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
O = read($1, data_type = "frame", format = "csv")
#Create feature matrix to learn parameters (45 samples per class)
X = as.matrix(O[1:45, 2:ncol(O)-1]) #setosa
X = rbind(X, as.matrix(O[51:95, 2:ncol(O)-1])) #versicolor
X = rbind(X, as.matrix(O[101:145, 2:ncol(O)-1])) #virginica
y = matrix (1, rows=45, cols=1)
y = rbind(y, matrix(2, rows=45, cols=1))
y = rbind(y, matrix(3, rows=45, cols=1))
[prior, means, covs, det] = gaussianClassifier(D=X, C=y, varSmoothing=$2);
#Create feature matrix for prediction (5 samples per class)
Xp = as.matrix(O[46:50, 2:ncol(O)-1]) #setosa
Xp = rbind(Xp, as.matrix(O[96:100, 2:ncol(O)-1])) #versicolor
Xp = rbind(Xp, as.matrix(O[146:150, 2:ncol(O)-1])) #virginica
#Set the target labels
yp = matrix(1, rows=5, cols=1)
yp = rbind(yp, matrix(2, rows=5, cols=1))
yp = rbind(yp, matrix(3, rows=5, cols=1))
#Compute the prediction with the learned parameters
nSamples = nrow(Xp)
nClasses = max(yp)
nFeats = ncol(Xp)
results = matrix(0, rows=nSamples, cols=nClasses)
for (class in 1:nClasses)
{
for (i in 1:nSamples)
{
intermediate = 0
meanDiff = (Xp[i,] - means[class,])
intermediate = -1/2 * log((2*pi)^nFeats * det[class,])
intermediate = intermediate - 1/2 * (meanDiff %*% as.matrix(covs[class]) %*% t(meanDiff))
intermediate = log(prior[class,]) + intermediate
results[i, class] = intermediate
}
}
#Get the predicted labels from the result matrix
predicted = rowIndexMax(results)
write(yp, $3);
write(predicted, $4);