scripts/builtin/gmm.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 # ------------------------------------------
 # Gaussian Mixture Model
 # ------------------------------------------

 # INPUT PARAMETERS:
 # ---------------------------------------------------------------------------------------------
 # NAME            TYPE    DEFAULT     MEANING
 # ---------------------------------------------------------------------------------------------
 # X               Double   ---       Matrix X
 # n_components    Integer  3         Number of n_components in the Gaussian mixture model
 # model           String   "VVV"     "VVV": unequal variance (full),each component has its own general covariance matrix
 #                                    "EEE": equal variance (tied), all components share the same general covariance matrix
 #                                    "VVI": spherical, unequal volume (diag), each component has its own diagonal covariance matrix
 #                                    "VII": spherical, equal volume (spherical), each component has its own single variance
 # init_param      String  "kmeans"   initialize weights with "kmeans" or "random"
 # iterations      Integer  100       Number of iterations
 # reg_covar       Double   1e-6      regularization parameter for covariance matrix
 # tol             Double   0.000001  tolerance value for convergence
 # ---------------------------------------------------------------------------------------------


 #Output(s)
 # ---------------------------------------------------------------------------------------------
 # NAME            TYPE    DEFAULT     MEANING
 # ---------------------------------------------------------------------------------------------
 # weight          Double   ---      A matrix whose [i,k]th entry is the probability that observation i in the test data belongs to the kth class
 # labels          Double   ---      Prediction matrix
 # df              Integer  ---      Number of estimated parameters
 # bic             Double   ---      Bayesian information criterion for best iteration


 m_gmm = function(Matrix[Double] X, Integer n_components = 1, String model = "VVV", String init_params = "kmeans", Integer iter = 100, Double reg_covar = 1e-6, Double tol = 0.000001, Boolean verbose = FALSE )
 return (Matrix[Double] weights, Matrix[Double] labels, Integer df, Double bic)
 {

   # sanity checks
   if(model != "VVV" & model != "EEE" & model != "VVI" & model != "VII")
     stop("model not supported, should be in VVV, EEE, VVI, VII");

   [labels, weights, norm] = fit(X, n_components, model, init_params, iter, reg_covar, tol)
   df = estimate_free_param(n_components, ncol(X), model)
   bic = getBIC(nrow(X),norm,df)
 }

 initialize_param = function(Matrix[Double] X, Integer n_components, String init_params, String model, Double reg_covar, Double tol)
 return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol)
 {
   # create responsibility matrix, resp[n_samples, n_components]
   resp = matrix(0, nrow(X), n_components)
   if(init_params == "kmeans")
   {
     [C, Y] = kmeans(X, n_components, 10, 10, tol, FALSE, 25)
     resp = resp + t(seq(1,n_components))
     resp = resp == Y
   }
   else if(init_params == "random")
   {
     resp = Rand(rows = nrow(X), cols=n_components)
     resp = resp/rowSums(resp)
   }
   else stop("invalid parameter value, expected kmeans or random found "+init_params)

   [weight, mean, sigma, precision_chol] = initialize(X, resp, n_components, model, reg_covar)
 }


 # Matrix/Vector Parameters
 # input: (X[n_samples, n_features], resp[n_samples, n_components])
 # output: (weight[n_samples, n_components], mean[n_components, n_features], sigma/prec_chol depends on model type)
 initialize = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model,  Double reg_covar)
 return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol)
 {
   n =  nrow(X)
   [weight, mean, sigma] = estimate_gaussian_param(X, resp, n_components, model, reg_covar)
   weight = weight/n
   precision_chol = compute_precision_cholesky(sigma, model)
 }

 estimate_gaussian_param = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model, Double reg_covar)
 return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma)
 {
   l = list()
   n =  nrow(X)
   # estimate Gaussian parameter
   nk = colSums(resp) + 2.220446049250313e-15
   mu = (t(resp) %*% X) / t(nk)
   sigma = list()
   if(model == "VVV")
     sigma = covariances_VVV(X, resp, mu, nk, reg_covar)
   else if(model == "EEE")
     sigma = covariances_EEE(X, resp, mu, nk, reg_covar)
   else if(model ==  "VVI")
     sigma = covariances_VVI(X, resp, mu, nk, reg_covar)
   else if (model == "VII")
     sigma = covariances_VII(X, resp, mu, nk, reg_covar)

   weight = nk
   mean = mu # n_components * n_features
 }

 # Matrix/Vector Parameters/List
 # input: (X[n_samples, n_features], resp[n_samples, n_components],  mu[n_components, n_features], nk[1, n_components])
 # output: (sigma a list of length = n_components where each item in list is a covariance matrix of (n_features * n_features) dimensions)
 covariances_VVV = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
 return(List[Unknown] sigma)
 {
   sigma = list()
   for(k in 1:nrow(mu))
   {
     diff = X - mu[k,]
     cov = (t(diff * resp[, k]) %*% diff) / as.scalar(nk[1,k])
     cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
     sigma = append(sigma, cov)
   }
 }

 # Matrix/Vector Parameters/List
 # input: (X[n_samples, n_features], resp[n_samples, n_components],  mu[n_components, n_features], nk[1, n_components])
 # output: (sigma a list of length = 1 where  item in list is a covariance matrix of (n_features * n_features) dimensions)
 covariances_EEE = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
 return(List[Unknown] sigma)
 {
   sigma = list()
   avgX2 = t(X) %*% X
   avgMean = (t(mu) * nk) %*% mu
   cov = avgX2 - avgMean
   cov = cov / sum(nk)
   cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
   sigma = append(sigma, cov)
 }

 # Matrix/Vector Parameters/List
 # input: (X[n_samples, n_features], resp[n_samples, n_components],  mu[n_components, n_features], nk[1, n_components])
 # output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_components * n_features) dimensions)
 covariances_VVI = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
 return(List[Unknown] sigma)
 {
   sigma = list()
   avgX2 = (t(resp) %*% (X*X)) / t(nk)
   avgMean = mu ^ 2
   avgMean2 = mu * (t(resp) %*% X) / t(nk)
   cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
   sigma = append(sigma, cov)
 }

 # Matrix/Vector Parameters/List
 # input: (X[n_samples, n_features], resp[n_samples, n_components],  mu[n_components, n_features], nk[1, n_components])
 # output: (sigma a list of length = 1 where item in list is a variance value for each component (1* n_components) dimensions)
 covariances_VII = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
 return(List[Unknown] sigma)
 {
   sigma = list()
   avgX2 = (t(resp) %*% (X*X)) / t(nk)
   avgMean = mu ^ 2
   avgMean2 = mu * (t(resp) %*% X) / t(nk)
   cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
   sigma = list(rowMeans(cov))
 }

 compute_precision_cholesky = function(List[Unknown] sigma, String model)
 return (List[Unknown] precision_chol)
 {
   precision_chol = list()

   if(model == "VVV") {
     comp = length(sigma)
     for(k in 1:length(sigma)) {
       cov = as.matrix(sigma[k])
       isSPD = checkSPD(cov)
       if(isSPD) {
         cov_chol = cholesky(cov)
         pre_chol = t(inv(cov_chol))
         precision_chol = append(precision_chol, pre_chol)
       } else
         stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric )."+
         "\nTry to decrease the number of components, or increase reg_covar")
     }
   }
   else if(model == "EEE") {
     cov = as.matrix(sigma[1])
     isSPD = checkSPD(cov)
     if(isSPD) {
       cov_chol = cholesky(cov)
       pre_chol = t(inv(cov_chol))
       precision_chol = append(precision_chol, pre_chol)
     } else
       stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+
       "\nTry to decrease the number of components, or increase reg_covar")
   }
   else {
     cov = as.matrix(sigma[1])
     if(sum(cov <= 0) > 0)
       stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+
       "\nTry to decrease the number of components, or increase reg_covar")
     else {
       precision_chol = append(precision_chol, 1.0/sqrt(cov))
     }
   }
 }

 # Expectation step
 e_step = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
 return(Double norm, Matrix[Double] log_resp){
   weighted_log_prob = estimate_weighted_log_prob(X, w, mu, precisions_cholesky, model)
   log_prob_norm = logsumexp(weighted_log_prob)
   log_resp = weighted_log_prob - log_prob_norm
   norm = mean(log_prob_norm)
 }

 # maximization Step
 m_step = function(Matrix[Double] X, Matrix[Double] log_resp, Integer n_components, String model, Double reg_covar)
 return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol) {
   n =  nrow(X)
   [weight, mean, sigma] = estimate_gaussian_param(X, exp(log_resp), n_components, model, reg_covar)
   weight = weight/n
   precision_chol = compute_precision_cholesky(sigma, model)
 }

 estimate_weighted_log_prob = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
 return (Matrix[Double] weight_log_pro)
 {
   weight_log_pro = estimate_log_prob(X, mu, precisions_cholesky, model) + estimate_log_weights(w)
 }

 estimate_log_weights = function(Matrix[Double] w)
 return (Matrix[Double] log_weight)
 {
   log_weight = log(w)
 }

 estimate_log_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
 return (Matrix[Double] log_prob)
 {
   log_prob = estimate_log_gaussian_prob(
             X, mu, precisions_cholesky, model)
 }

 compute_log_det_cholesky = function(List[Unknown] mat_chol, String model, Integer d)
 return(Matrix[Double] log_det_cholesky)
 {
   comp = length(mat_chol)

   if(model == "VVV")
   {
     log_det_chol = matrix(0, 1, comp)
     for(k in 1:comp)
     {
       mat = as.matrix(mat_chol[k])
       log_det = sum(log(diag(t(mat))))   # have to take the log of diag elements only
       log_det_chol[1,k] = log_det
     }
   }
   else if(model == "EEE")
   {
     mat = as.matrix(mat_chol[1])
     log_det_chol = as.matrix(sum(log(diag(mat))))
   }
   else if(model ==  "VVI")
   {
     mat = as.matrix(mat_chol[1])
     log_det_chol = t(rowSums(log(mat)))
   }
   else if (model == "VII")
   {
     mat = as.matrix(mat_chol[1])
     log_det_chol = t(d * log(mat))
   }
   log_det_cholesky = log_det_chol
 }

 estimate_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] prec_chol, String model)
 return(Matrix[Double] es_log_prob ) # nrow(X) * n_components
 {
   n = nrow(X)
   d = ncol(X)
   n_components = nrow(mu)

   log_det = compute_log_det_cholesky(prec_chol, model, d)
   if(model == "VVV")
   {
     log_prob = matrix(0, n, n_components)
     for(k in 1:n_components)
     {
       prec = as.matrix(prec_chol[k])
       y = X %*% prec - mu[k,] %*% prec  # changing here t intro:  y = X %*% prec - mu[k,] %*% prec
       log_prob[, k] = rowSums(y*y)
     }
   }
   else if(model == "EEE")
   {
     log_prob = matrix(0, n, n_components)
     prec = as.matrix(prec_chol[1])
     for(k in 1:n_components) {
       y = X %*% prec - mu[k,] %*% prec
       log_prob[, k] = rowSums(y*y) # TODO replace y*y with squared built-in
     }
   }
   else if(model ==  "VVI")
   {
     prec = as.matrix(prec_chol[1])
     precisions = prec^2
     bc_matrix = matrix(1,nrow(X), nrow(mu))
     log_prob = (bc_matrix*t(rowSums(mu^2 * precisions)) -
                     2. * (X %*% t(mu * precisions)) +
                     X^2 %*% t(precisions))
   }
   else if (model == "VII")
   {
     prec = as.matrix(prec_chol[1])
     precisions = prec^ 2
     bc_matrix = matrix(1,nrow(X), nrow(mu))
     log_prob = (bc_matrix * t(rowSums(mu^2) * precisions) -
                     2 * X %*% t(mu * precisions) +
                     rowSums(X*X) %*% t(precisions) ) # TODO replace rowSums(X*X) with squared rowNorm() built-in
   }
   if(ncol(log_det) == 1)
     log_det = matrix(1, 1, ncol(log_prob)) * log_det
   es_log_prob = -.5 * (d * log(2 * pi) + log_prob) + log_det
 }

 logsumexp = function(Matrix[Double] M) # TODO replace with a built-in function logsumexp
 return(Matrix[Double] soft)
 {
   max = max(M)
   ds = M - max
   sumOfexp = rowSums(exp(ds))
   soft = max + log(sumOfexp)
 }

 # compute the number of estimated parameters
 estimate_free_param = function(Integer n_components, Integer n_features, String model)
 return (Integer n_parameters)
 {
   if(model == "VVV")
     cov_param = n_components * n_features * (n_features + 1) / 2
   else if(model == "EEE")
     cov_param = n_features * (n_features + 1) / 2
   else if (model == "VVI")
     cov_param = n_components * n_features
   else if (model == "VII")
     cov_param = n_components
   else
     stop("invalid model expecting any of [VVV,EEE,VVI,VII], found "+model)
   mean_param = n_features * n_components

   n_parameters = as.integer( cov_param + mean_param + n_components - 1 )

 }

 fit = function(Matrix[Double] X, Integer n_components, String model, String init_params, Integer iter , Double reg_covar, Double tol)
 return (Matrix[Double] label, Matrix[Double] predict_prob, Double log_prob_norm)
 {

   lower_bound = 0
   converged = FALSE
   n = nrow(X)
   [weight, mean, sigma, precision_chol] = initialize_param(X, n_components,init_params, model, reg_covar, tol)
   i = 1
   while(i <= iter & !converged) {
     prev_lower_bound = lower_bound
     [log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model)
     [weight, mean, sigma, precision_chol] = m_step(X, log_resp, n_components, model, reg_covar)
     lower_bound = log_prob_norm
     change = lower_bound - prev_lower_bound
     if(abs(change) < tol)
       converged = TRUE
     i = i+1
   }
   [log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model)
   label = rowIndexMax(log_resp)
   predict_prob = exp(log_resp)
 }

 getBIC = function(Integer n, Double norm, Integer df)
 return(Double bic)
 {
   bic = -2 * norm * n + df * log(n)
 }

 # check if covariance matrix is symmetric and positive definite
 checkSPD = function(Matrix[Double] A)
 return(Boolean isSPD)
 {
   # abs(a - t(a)) <= (absoluteTolerance + relativeTolerance * abs(b))
   sym = abs(A - t(A)) <= (1e-10 * abs(t(A)))
   if(sum(sym == 0) == 0)
   {
     [eval, evec] = eigen(A);
     if(sum(eval < 0) == 0) #check positive definite
       isSPD = TRUE
     else  isSPD = FALSE
   }
   else isSPD = FALSE
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	# ------------------------------------------
	# Gaussian Mixture Model
	# ------------------------------------------

	# INPUT PARAMETERS:
	# ---------------------------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ---------------------------------------------------------------------------------------------
	# X Double --- Matrix X
	# n_components Integer 3 Number of n_components in the Gaussian mixture model
	# model String "VVV" "VVV": unequal variance (full),each component has its own general covariance matrix
	# "EEE": equal variance (tied), all components share the same general covariance matrix
	# "VVI": spherical, unequal volume (diag), each component has its own diagonal covariance matrix
	# "VII": spherical, equal volume (spherical), each component has its own single variance
	# init_param String "kmeans" initialize weights with "kmeans" or "random"
	# iterations Integer 100 Number of iterations
	# reg_covar Double 1e-6 regularization parameter for covariance matrix
	# tol Double 0.000001 tolerance value for convergence
	# ---------------------------------------------------------------------------------------------


	#Output(s)
	# ---------------------------------------------------------------------------------------------
	# NAME TYPE DEFAULT MEANING
	# ---------------------------------------------------------------------------------------------
	# weight Double --- A matrix whose [i,k]th entry is the probability that observation i in the test data belongs to the kth class
	# labels Double --- Prediction matrix
	# df Integer --- Number of estimated parameters
	# bic Double --- Bayesian information criterion for best iteration




	m_gmm = function(Matrix[Double] X, Integer n_components = 1, String model = "VVV", String init_params = "kmeans", Integer iter = 100, Double reg_covar = 1e-6, Double tol = 0.000001, Boolean verbose = FALSE )
	return (Matrix[Double] weights, Matrix[Double] labels, Integer df, Double bic)
	{

	# sanity checks
	if(model != "VVV" & model != "EEE" & model != "VVI" & model != "VII")
	stop("model not supported, should be in VVV, EEE, VVI, VII");

	[labels, weights, norm] = fit(X, n_components, model, init_params, iter, reg_covar, tol)
	df = estimate_free_param(n_components, ncol(X), model)
	bic = getBIC(nrow(X),norm,df)
	}

	initialize_param = function(Matrix[Double] X, Integer n_components, String init_params, String model, Double reg_covar, Double tol)
	return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol)
	{
	# create responsibility matrix, resp[n_samples, n_components]
	resp = matrix(0, nrow(X), n_components)
	if(init_params == "kmeans")
	{
	[C, Y] = kmeans(X, n_components, 10, 10, tol, FALSE, 25)
	resp = resp + t(seq(1,n_components))
	resp = resp == Y
	}
	else if(init_params == "random")
	{
	resp = Rand(rows = nrow(X), cols=n_components)
	resp = resp/rowSums(resp)
	}
	else stop("invalid parameter value, expected kmeans or random found "+init_params)

	[weight, mean, sigma, precision_chol] = initialize(X, resp, n_components, model, reg_covar)
	}


	# Matrix/Vector Parameters
	# input: (X[n_samples, n_features], resp[n_samples, n_components])
	# output: (weight[n_samples, n_components], mean[n_components, n_features], sigma/prec_chol depends on model type)
	initialize = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model, Double reg_covar)
	return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol)
	{
	n = nrow(X)
	[weight, mean, sigma] = estimate_gaussian_param(X, resp, n_components, model, reg_covar)
	weight = weight/n
	precision_chol = compute_precision_cholesky(sigma, model)
	}

	estimate_gaussian_param = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model, Double reg_covar)
	return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma)
	{
	l = list()
	n = nrow(X)
	# estimate Gaussian parameter
	nk = colSums(resp) + 2.220446049250313e-15
	mu = (t(resp) %*% X) / t(nk)
	sigma = list()
	if(model == "VVV")
	sigma = covariances_VVV(X, resp, mu, nk, reg_covar)
	else if(model == "EEE")
	sigma = covariances_EEE(X, resp, mu, nk, reg_covar)
	else if(model == "VVI")
	sigma = covariances_VVI(X, resp, mu, nk, reg_covar)
	else if (model == "VII")
	sigma = covariances_VII(X, resp, mu, nk, reg_covar)

	weight = nk
	mean = mu # n_components * n_features
	}

	# Matrix/Vector Parameters/List
	# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
	# output: (sigma a list of length = n_components where each item in list is a covariance matrix of (n_features * n_features) dimensions)
	covariances_VVV = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
	return(List[Unknown] sigma)
	{
	sigma = list()
	for(k in 1:nrow(mu))
	{
	diff = X - mu[k,]
	cov = (t(diff * resp[, k]) %*% diff) / as.scalar(nk[1,k])
	cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
	sigma = append(sigma, cov)
	}
	}

	# Matrix/Vector Parameters/List
	# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
	# output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_features * n_features) dimensions)
	covariances_EEE = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
	return(List[Unknown] sigma)
	{
	sigma = list()
	avgX2 = t(X) %*% X
	avgMean = (t(mu) * nk) %*% mu
	cov = avgX2 - avgMean
	cov = cov / sum(nk)
	cov = cov + diag(matrix(reg_covar, ncol(cov), 1))
	sigma = append(sigma, cov)
	}

	# Matrix/Vector Parameters/List
	# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
	# output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_components * n_features) dimensions)
	covariances_VVI = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
	return(List[Unknown] sigma)
	{
	sigma = list()
	avgX2 = (t(resp) %% (XX)) / t(nk)
	avgMean = mu ^ 2
	avgMean2 = mu * (t(resp) %*% X) / t(nk)
	cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
	sigma = append(sigma, cov)
	}

	# Matrix/Vector Parameters/List
	# input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components])
	# output: (sigma a list of length = 1 where item in list is a variance value for each component (1* n_components) dimensions)
	covariances_VII = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar)
	return(List[Unknown] sigma)
	{
	sigma = list()
	avgX2 = (t(resp) %% (XX)) / t(nk)
	avgMean = mu ^ 2
	avgMean2 = mu * (t(resp) %*% X) / t(nk)
	cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar
	sigma = list(rowMeans(cov))
	}

	compute_precision_cholesky = function(List[Unknown] sigma, String model)
	return (List[Unknown] precision_chol)
	{
	precision_chol = list()

	if(model == "VVV") {
	comp = length(sigma)
	for(k in 1:length(sigma)) {
	cov = as.matrix(sigma[k])
	isSPD = checkSPD(cov)
	if(isSPD) {
	cov_chol = cholesky(cov)
	pre_chol = t(inv(cov_chol))
	precision_chol = append(precision_chol, pre_chol)
	} else
	stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric )."+
	"\nTry to decrease the number of components, or increase reg_covar")
	}
	}
	else if(model == "EEE") {
	cov = as.matrix(sigma[1])
	isSPD = checkSPD(cov)
	if(isSPD) {
	cov_chol = cholesky(cov)
	pre_chol = t(inv(cov_chol))
	precision_chol = append(precision_chol, pre_chol)
	} else
	stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+
	"\nTry to decrease the number of components, or increase reg_covar")
	}
	else {
	cov = as.matrix(sigma[1])
	if(sum(cov <= 0) > 0)
	stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+
	"\nTry to decrease the number of components, or increase reg_covar")
	else {
	precision_chol = append(precision_chol, 1.0/sqrt(cov))
	}
	}
	}

	# Expectation step
	e_step = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
	return(Double norm, Matrix[Double] log_resp){
	weighted_log_prob = estimate_weighted_log_prob(X, w, mu, precisions_cholesky, model)
	log_prob_norm = logsumexp(weighted_log_prob)
	log_resp = weighted_log_prob - log_prob_norm
	norm = mean(log_prob_norm)
	}

	# maximization Step
	m_step = function(Matrix[Double] X, Matrix[Double] log_resp, Integer n_components, String model, Double reg_covar)
	return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol) {
	n = nrow(X)
	[weight, mean, sigma] = estimate_gaussian_param(X, exp(log_resp), n_components, model, reg_covar)
	weight = weight/n
	precision_chol = compute_precision_cholesky(sigma, model)
	}

	estimate_weighted_log_prob = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
	return (Matrix[Double] weight_log_pro)
	{
	weight_log_pro = estimate_log_prob(X, mu, precisions_cholesky, model) + estimate_log_weights(w)
	}

	estimate_log_weights = function(Matrix[Double] w)
	return (Matrix[Double] log_weight)
	{
	log_weight = log(w)
	}

	estimate_log_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model)
	return (Matrix[Double] log_prob)
	{
	log_prob = estimate_log_gaussian_prob(
	X, mu, precisions_cholesky, model)
	}

	compute_log_det_cholesky = function(List[Unknown] mat_chol, String model, Integer d)
	return(Matrix[Double] log_det_cholesky)
	{
	comp = length(mat_chol)

	if(model == "VVV")
	{
	log_det_chol = matrix(0, 1, comp)
	for(k in 1:comp)
	{
	mat = as.matrix(mat_chol[k])
	log_det = sum(log(diag(t(mat)))) # have to take the log of diag elements only
	log_det_chol[1,k] = log_det
	}
	}
	else if(model == "EEE")
	{
	mat = as.matrix(mat_chol[1])
	log_det_chol = as.matrix(sum(log(diag(mat))))
	}
	else if(model == "VVI")
	{
	mat = as.matrix(mat_chol[1])
	log_det_chol = t(rowSums(log(mat)))
	}
	else if (model == "VII")
	{
	mat = as.matrix(mat_chol[1])
	log_det_chol = t(d * log(mat))
	}
	log_det_cholesky = log_det_chol
	}

	estimate_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] prec_chol, String model)
	return(Matrix[Double] es_log_prob ) # nrow(X) * n_components
	{
	n = nrow(X)
	d = ncol(X)
	n_components = nrow(mu)

	log_det = compute_log_det_cholesky(prec_chol, model, d)
	if(model == "VVV")
	{
	log_prob = matrix(0, n, n_components)
	for(k in 1:n_components)
	{
	prec = as.matrix(prec_chol[k])
	y = X %% prec - mu[k,] %% prec # changing here t intro: y = X %% prec - mu[k,] %% prec
	log_prob[, k] = rowSums(y*y)
	}
	}
	else if(model == "EEE")
	{
	log_prob = matrix(0, n, n_components)
	prec = as.matrix(prec_chol[1])
	for(k in 1:n_components) {
	y = X %% prec - mu[k,] %% prec
	log_prob[, k] = rowSums(yy) # TODO replace yy with squared built-in
	}
	}
	else if(model == "VVI")
	{
	prec = as.matrix(prec_chol[1])
	precisions = prec^2
	bc_matrix = matrix(1,nrow(X), nrow(mu))
	log_prob = (bc_matrixt(rowSums(mu^2 precisions)) -
	2. * (X %% t(mu precisions)) +
	X^2 %*% t(precisions))
	}
	else if (model == "VII")
	{
	prec = as.matrix(prec_chol[1])
	precisions = prec^ 2
	bc_matrix = matrix(1,nrow(X), nrow(mu))
	log_prob = (bc_matrix * t(rowSums(mu^2) * precisions) -
	2 * X %% t(mu precisions) +
	rowSums(XX) %% t(precisions) ) # TODO replace rowSums(X*X) with squared rowNorm() built-in
	}
	if(ncol(log_det) == 1)
	log_det = matrix(1, 1, ncol(log_prob)) * log_det
	es_log_prob = -.5 * (d * log(2 * pi) + log_prob) + log_det
	}

	logsumexp = function(Matrix[Double] M) # TODO replace with a built-in function logsumexp
	return(Matrix[Double] soft)
	{
	max = max(M)
	ds = M - max
	sumOfexp = rowSums(exp(ds))
	soft = max + log(sumOfexp)
	}

	# compute the number of estimated parameters
	estimate_free_param = function(Integer n_components, Integer n_features, String model)
	return (Integer n_parameters)
	{
	if(model == "VVV")
	cov_param = n_components * n_features * (n_features + 1) / 2
	else if(model == "EEE")
	cov_param = n_features * (n_features + 1) / 2
	else if (model == "VVI")
	cov_param = n_components * n_features
	else if (model == "VII")
	cov_param = n_components
	else
	stop("invalid model expecting any of [VVV,EEE,VVI,VII], found "+model)
	mean_param = n_features * n_components

	n_parameters = as.integer( cov_param + mean_param + n_components - 1 )

	}

	fit = function(Matrix[Double] X, Integer n_components, String model, String init_params, Integer iter , Double reg_covar, Double tol)
	return (Matrix[Double] label, Matrix[Double] predict_prob, Double log_prob_norm)
	{

	lower_bound = 0
	converged = FALSE
	n = nrow(X)
	[weight, mean, sigma, precision_chol] = initialize_param(X, n_components,init_params, model, reg_covar, tol)
	i = 1
	while(i <= iter & !converged) {
	prev_lower_bound = lower_bound
	[log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model)
	[weight, mean, sigma, precision_chol] = m_step(X, log_resp, n_components, model, reg_covar)
	lower_bound = log_prob_norm
	change = lower_bound - prev_lower_bound
	if(abs(change) < tol)
	converged = TRUE
	i = i+1
	}
	[log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model)
	label = rowIndexMax(log_resp)
	predict_prob = exp(log_resp)
	}

	getBIC = function(Integer n, Double norm, Integer df)
	return(Double bic)
	{
	bic = -2 * norm * n + df * log(n)
	}

	# check if covariance matrix is symmetric and positive definite
	checkSPD = function(Matrix[Double] A)
	return(Boolean isSPD)
	{
	# abs(a - t(a)) <= (absoluteTolerance + relativeTolerance * abs(b))
	sym = abs(A - t(A)) <= (1e-10 * abs(t(A)))
	if(sum(sym == 0) == 0)
	{
	[eval, evec] = eigen(A);
	if(sum(eval < 0) == 0) #check positive definite
	isSPD = TRUE
	else isSPD = FALSE
	}
	else isSPD = FALSE
	}