blob: 2eafe13b94e9fb8e991b1c0aaee05a5674a1714c [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# ------------------------------------------
# Gaussian Mixture Model
# ------------------------------------------
# INPUT PARAMETERS:
# ---------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ---------------------------------------------------------------------------------------------
# X Double --- Matrix X
# n_components Integer 3 Number of n_components in the Gaussian mixture model
# model String "VVV" "VVV": unequal variance (full),each component has its own general covariance matrix
# "EEE": equal variance (tied), all components share the same general covariance matrix
# "VVI": spherical, unequal volume (diag), each component has its own diagonal covariance matrix
# "VII": spherical, equal volume (spherical), each component has its own single variance
# init_param String "kmeans" initialize weights with "kmeans" or "random"
# iterations Integer 100 Number of iterations
# reg_covar Double 1e-6 regularization parameter for covariance matrix
# tol Double 0.000001 tolerance value for convergence
# ---------------------------------------------------------------------------------------------
#Output(s)
# ---------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ---------------------------------------------------------------------------------------------
# weight Double --- A matrix whose [i,k]th entry is the probability that observation i in the test data belongs to the kth class
# labels Double --- Prediction matrix
# df Integer --- Number of estimated parameters
# bic Double --- Bayesian information criterion for best iteration
m_gmm = function(Matrix[Double] X, Integer n_components = 1, String model = "VVV", String init_params = "kmeans", Integer iter = 100, Double reg_covar = 1e-6, Double tol = 0.000001, Boolean verbose = FALSE )
return (Matrix[Double] weights, Matrix[Double] labels, Integer df, Double bic)
{
# sanity checks
if(model != "VVV" & model != "EEE" & model != "VVI" & model != "VII")
stop("model not supported, should be in VVV, EEE, VVI, VII");
[labels, weights, norm] = fit(X, n_components, model, init_params, iter, reg_covar, tol)
df = estimate_free_param(n_components, ncol(X), model)
bic = getBIC(nrow(X),norm,df)
}
initialize_param = function(Matrix[Double] X, Integer n_components, String init_params, String model, Double reg_covar, Double tol)
return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol)
{
# create responsibility matrix, resp[n_samples, n_components]
resp = matrix(0, nrow(X), n_components)
if(init_params == "kmeans")
{
[C, Y] = kmeans(X, n_components, 10, 10, tol, FALSE, 25)
resp = resp + t(seq(1,n_components))
resp = resp == Y
}
else if(init_params == "random")
{
resp = Rand(rows = nrow(X), cols=n_components)
resp = resp/rowSums(resp)
}
else stop("invalid parameter value, expected kmeans or random found "+init_params)
[weight, mean, sigma, precision_chol] = initialize(X, resp, n_components, model, reg_covar)
}
# Matrix/Vector Parameters
# input: (X[n_samples, n_features], resp[n_samples, n_components])
# output: (weight[n_samples, n_components], mean[n_components, n_features], sigma/prec_chol depends on model type)
initialize = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model, Double reg_covar)
return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol)
{
n = nrow(X)
[weight, mean, sigma] = estimate_gaussian_param(X, resp, n_components, model, reg_covar)
weight = weight/n
precision_chol = compute_precision_cholesky(sigma, model)
}
estimate_gaussian_param = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model, Double reg_covar)
return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma)
{
l = list()
n = nrow(X)
# estimate Gaussian parameter
nk = colSums(resp) + 2.220446049250313e-15
mu = (t(resp) %*% X) / t(nk)
sigma = list()
if(model == "VVV")
sigma = covariances_VVV(X, resp, mu, nk, reg_covar)
else if(model == "EEE")
sigma = covariances_EEE(X, resp, mu, nk, reg_covar)
else if(model == "VVI")
sigma = covariances_VVI(X, resp, mu, nk, reg_covar)
else if (model == "VII")
sigma = covariances_VII(X, resp, mu, nk, reg_covar)
weight = nk
mean = mu # n_components * n_features
}
# Matrix/Vector Parameters/List
# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
# output: (sigma a list of length = n_components where each item in list is a covariance matrix of (n_features * n_features) dimensions)
covariances_VVV = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
return(List[Unknown] sigma)
{
sigma = list()
for(k in 1:nrow(mu))
{
diff = X - mu[k,]
cov = (t(diff * resp[, k]) %*% diff) / as.scalar(nk[1,k])
cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
sigma = append(sigma, cov)
}
}
# Matrix/Vector Parameters/List
# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
# output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_features * n_features) dimensions)
covariances_EEE = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
return(List[Unknown] sigma)
{
sigma = list()
avgX2 = t(X) %*% X
avgMean = (t(mu) * nk) %*% mu
cov = avgX2 - avgMean
cov = cov / sum(nk)
cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
sigma = append(sigma, cov)
}
# Matrix/Vector Parameters/List
# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
# output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_components * n_features) dimensions)
covariances_VVI = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
return(List[Unknown] sigma)
{
sigma = list()
avgX2 = (t(resp) %*% (X*X)) / t(nk)
avgMean = mu ^ 2
avgMean2 = mu * (t(resp) %*% X) / t(nk)
cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
sigma = append(sigma, cov)
}
# Matrix/Vector Parameters/List
# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
# output: (sigma a list of length = 1 where item in list is a variance value for each component (1* n_components) dimensions)
covariances_VII = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
return(List[Unknown] sigma)
{
sigma = list()
avgX2 = (t(resp) %*% (X*X)) / t(nk)
avgMean = mu ^ 2
avgMean2 = mu * (t(resp) %*% X) / t(nk)
cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
sigma = list(rowMeans(cov))
}
compute_precision_cholesky = function(List[Unknown] sigma, String model)
return (List[Unknown] precision_chol)
{
precision_chol = list()
if(model == "VVV") {
comp = length(sigma)
for(k in 1:length(sigma)) {
cov = as.matrix(sigma[k])
isSPD = checkSPD(cov)
if(isSPD) {
cov_chol = cholesky(cov)
pre_chol = t(inv(cov_chol))
precision_chol = append(precision_chol, pre_chol)
} else
stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric )."+
"\nTry to decrease the number of components, or increase reg_covar")
}
}
else if(model == "EEE") {
cov = as.matrix(sigma[1])
isSPD = checkSPD(cov)
if(isSPD) {
cov_chol = cholesky(cov)
pre_chol = t(inv(cov_chol))
precision_chol = append(precision_chol, pre_chol)
} else
stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+
"\nTry to decrease the number of components, or increase reg_covar")
}
else {
cov = as.matrix(sigma[1])
if(sum(cov <= 0) > 0)
stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+
"\nTry to decrease the number of components, or increase reg_covar")
else {
precision_chol = append(precision_chol, 1.0/sqrt(cov))
}
}
}
# Expectation step
e_step = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
return(Double norm, Matrix[Double] log_resp){
weighted_log_prob = estimate_weighted_log_prob(X, w, mu, precisions_cholesky, model)
log_prob_norm = logsumexp(weighted_log_prob)
log_resp = weighted_log_prob - log_prob_norm
norm = mean(log_prob_norm)
}
# maximization Step
m_step = function(Matrix[Double] X, Matrix[Double] log_resp, Integer n_components, String model, Double reg_covar)
return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol) {
n = nrow(X)
[weight, mean, sigma] = estimate_gaussian_param(X, exp(log_resp), n_components, model, reg_covar)
weight = weight/n
precision_chol = compute_precision_cholesky(sigma, model)
}
estimate_weighted_log_prob = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
return (Matrix[Double] weight_log_pro)
{
weight_log_pro = estimate_log_prob(X, mu, precisions_cholesky, model) + estimate_log_weights(w)
}
estimate_log_weights = function(Matrix[Double] w)
return (Matrix[Double] log_weight)
{
log_weight = log(w)
}
estimate_log_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
return (Matrix[Double] log_prob)
{
log_prob = estimate_log_gaussian_prob(
X, mu, precisions_cholesky, model)
}
compute_log_det_cholesky = function(List[Unknown] mat_chol, String model, Integer d)
return(Matrix[Double] log_det_cholesky)
{
comp = length(mat_chol)
if(model == "VVV")
{
log_det_chol = matrix(0, 1, comp)
for(k in 1:comp)
{
mat = as.matrix(mat_chol[k])
log_det = sum(log(diag(t(mat)))) # have to take the log of diag elements only
log_det_chol[1,k] = log_det
}
}
else if(model == "EEE")
{
mat = as.matrix(mat_chol[1])
log_det_chol = as.matrix(sum(log(diag(mat))))
}
else if(model == "VVI")
{
mat = as.matrix(mat_chol[1])
log_det_chol = t(rowSums(log(mat)))
}
else if (model == "VII")
{
mat = as.matrix(mat_chol[1])
log_det_chol = t(d * log(mat))
}
log_det_cholesky = log_det_chol
}
estimate_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] prec_chol, String model)
return(Matrix[Double] es_log_prob ) # nrow(X) * n_components
{
n = nrow(X)
d = ncol(X)
n_components = nrow(mu)
log_det = compute_log_det_cholesky(prec_chol, model, d)
if(model == "VVV")
{
log_prob = matrix(0, n, n_components)
for(k in 1:n_components)
{
prec = as.matrix(prec_chol[k])
y = X %*% prec - mu[k,] %*% prec # changing here t intro: y = X %*% prec - mu[k,] %*% prec
log_prob[, k] = rowSums(y*y)
}
}
else if(model == "EEE")
{
log_prob = matrix(0, n, n_components)
prec = as.matrix(prec_chol[1])
for(k in 1:n_components) {
y = X %*% prec - mu[k,] %*% prec
log_prob[, k] = rowSums(y*y) # TODO replace y*y with squared built-in
}
}
else if(model == "VVI")
{
prec = as.matrix(prec_chol[1])
precisions = prec^2
bc_matrix = matrix(1,nrow(X), nrow(mu))
log_prob = (bc_matrix*t(rowSums(mu^2 * precisions)) -
2. * (X %*% t(mu * precisions)) +
X^2 %*% t(precisions))
}
else if (model == "VII")
{
prec = as.matrix(prec_chol[1])
precisions = prec^ 2
bc_matrix = matrix(1,nrow(X), nrow(mu))
log_prob = (bc_matrix * t(rowSums(mu^2) * precisions) -
2 * X %*% t(mu * precisions) +
rowSums(X*X) %*% t(precisions) ) # TODO replace rowSums(X*X) with squared rowNorm() built-in
}
if(ncol(log_det) == 1)
log_det = matrix(1, 1, ncol(log_prob)) * log_det
es_log_prob = -.5 * (d * log(2 * pi) + log_prob) + log_det
}
logsumexp = function(Matrix[Double] M) # TODO replace with a built-in function logsumexp
return(Matrix[Double] soft)
{
max = max(M)
ds = M - max
sumOfexp = rowSums(exp(ds))
soft = max + log(sumOfexp)
}
# compute the number of estimated parameters
estimate_free_param = function(Integer n_components, Integer n_features, String model)
return (Integer n_parameters)
{
if(model == "VVV")
cov_param = n_components * n_features * (n_features + 1) / 2
else if(model == "EEE")
cov_param = n_features * (n_features + 1) / 2
else if (model == "VVI")
cov_param = n_components * n_features
else if (model == "VII")
cov_param = n_components
else
stop("invalid model expecting any of [VVV,EEE,VVI,VII], found "+model)
mean_param = n_features * n_components
n_parameters = as.integer( cov_param + mean_param + n_components - 1 )
}
fit = function(Matrix[Double] X, Integer n_components, String model, String init_params, Integer iter , Double reg_covar, Double tol)
return (Matrix[Double] label, Matrix[Double] predict_prob, Double log_prob_norm)
{
lower_bound = 0
converged = FALSE
n = nrow(X)
[weight, mean, sigma, precision_chol] = initialize_param(X, n_components,init_params, model, reg_covar, tol)
i = 1
while(i <= iter & !converged) {
prev_lower_bound = lower_bound
[log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model)
[weight, mean, sigma, precision_chol] = m_step(X, log_resp, n_components, model, reg_covar)
lower_bound = log_prob_norm
change = lower_bound - prev_lower_bound
if(abs(change) < tol)
converged = TRUE
i = i+1
}
[log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model)
label = rowIndexMax(log_resp)
predict_prob = exp(log_resp)
}
getBIC = function(Integer n, Double norm, Integer df)
return(Double bic)
{
bic = -2 * norm * n + df * log(n)
}
# check if covariance matrix is symmetric and positive definite
checkSPD = function(Matrix[Double] A)
return(Boolean isSPD)
{
# abs(a - t(a)) <= (absoluteTolerance + relativeTolerance * abs(b))
sym = abs(A - t(A)) <= (1e-10 * abs(t(A)))
if(sum(sym == 0) == 0)
{
[eval, evec] = eigen(A);
if(sum(eval < 0) == 0) #check positive definite
isSPD = TRUE
else isSPD = FALSE
}
else isSPD = FALSE
}