blob: 5a0ba1a7fd30d6429ee9bf25179f5e761a6c3820 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
args <- commandArgs(TRUE)
library("Matrix")
D <- as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
c <- as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
nClasses <- as.integer(max(c))
varSmoothing <- as.double(args[2])
nSamples <- nrow(D)
nFeatures <- ncol(D)
classInvCovariances <- list()
classMeans <- aggregate(D, by=list(c), FUN= mean)
classMeans <- classMeans[1:nFeatures+1]
classVars <- aggregate(D, by=list(c), FUN=var)
classVars[is.na(classVars)] <- 0
smoothedVar <- varSmoothing * max(classVars) * diag(nFeatures)
classCounts <- aggregate(c, by=list(c), FUN=length)
classCounts <- classCounts[2]
classPriors <- classCounts / nSamples
determinants <- matrix(0, nrow=nClasses, ncol=1)
for (i in 1:nClasses)
{
classMatrix <- subset(D, c==i)
covMatrix <- cov(x=classMatrix, use="all.obs")
covMatrix[is.na(covMatrix)] <- 0
covMatrix <- covMatrix + smoothedVar
#determinant <- det(covMatrix)
#determinants[i] <- det(covMatrix)
ev <- eigen(covMatrix)
vecs <- ev$vectors
vals <- ev$values
lam <- diag(vals^(-1))
determinants[i] <- prod(vals)
invCovMatrix <- vecs %*% lam %*% t(vecs)
invCovMatrix[is.na(invCovMatrix)] <- 0
classInvCovariances[[i]] <- invCovMatrix
}
#Calc accuracy
results <- matrix(0, nrow=nSamples, ncol=nClasses)
for (class in 1:nClasses)
{
for (i in 1:nSamples)
{
intermediate <- 0
meanDiff <- (D[i,] - classMeans[class,])
intermediate <- -1/2 * log((2*pi)^nFeatures * determinants[class,])
intermediate <- intermediate - 1/2 * (as.matrix(meanDiff) %*% as.matrix(classInvCovariances[[class]])
%*% t(as.matrix(meanDiff)))
intermediate <- log(classPriors[class,]) + intermediate
results[i, class] <- intermediate
}
}
pred <- max.col(results)
acc <- sum(pred == c) / nSamples * 100
print(paste("Training Accuracy (%): ", acc, sep=""))
classPriors <- data.matrix(classPriors)
classMeans <- data.matrix(classMeans)
#Cbind the inverse covariance matrices, to make them comparable in the unit tests
stackedInvCovs <- classInvCovariances[[1]]
for (i in 2:nClasses)
{
stackedInvCovs <- cbind(stackedInvCovs, classInvCovariances[[i]])
}
writeMM(as(classPriors, "CsparseMatrix"), paste(args[3], "priors", sep=""));
writeMM(as(classMeans, "CsparseMatrix"), paste(args[3], "means", sep=""));
writeMM(as(determinants, "CsparseMatrix"), paste(args[3], "determinants", sep=""));
writeMM(as(stackedInvCovs, "CsparseMatrix"), paste(args[3], "invcovs", sep=""));