| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # ------------------------------------------ |
| # Gaussian Mixture Model |
| # ------------------------------------------ |
| |
| # INPUT PARAMETERS: |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # X Double --- Matrix X |
| # n_components Integer 3 Number of n_components in the Gaussian mixture model |
| # model String "VVV" "VVV": unequal variance (full),each component has its own general covariance matrix |
| # "EEE": equal variance (tied), all components share the same general covariance matrix |
| # "VVI": spherical, unequal volume (diag), each component has its own diagonal covariance matrix |
| # "VII": spherical, equal volume (spherical), each component has its own single variance |
| # init_param String "kmeans" initialize weights with "kmeans" or "random" |
| # iterations Integer 100 Number of iterations |
| # reg_covar Double 1e-6 regularization parameter for covariance matrix |
| # tol Double 0.000001 tolerance value for convergence |
| # --------------------------------------------------------------------------------------------- |
| |
| |
| #Output(s) |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # weight Double --- A matrix whose [i,k]th entry is the probability that observation i in the test data belongs to the kth class |
| # labels Double --- Prediction matrix |
| # df Integer --- Number of estimated parameters |
| # bic Double --- Bayesian information criterion for best iteration |
| |
| |
| |
| |
| m_gmm = function(Matrix[Double] X, Integer n_components = 1, String model = "VVV", String init_params = "kmeans", Integer iter = 100, Double reg_covar = 1e-6, Double tol = 0.000001, Boolean verbose = FALSE ) |
| return (Matrix[Double] weights, Matrix[Double] labels, Integer df, Double bic) |
| { |
| |
| # sanity checks |
| if(model != "VVV" & model != "EEE" & model != "VVI" & model != "VII") |
| stop("model not supported, should be in VVV, EEE, VVI, VII"); |
| |
| [labels, weights, norm] = fit(X, n_components, model, init_params, iter, reg_covar, tol) |
| df = estimate_free_param(n_components, ncol(X), model) |
| bic = getBIC(nrow(X),norm,df) |
| } |
| |
| initialize_param = function(Matrix[Double] X, Integer n_components, String init_params, String model, Double reg_covar, Double tol) |
| return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol) |
| { |
| # create responsibility matrix, resp[n_samples, n_components] |
| resp = matrix(0, nrow(X), n_components) |
| if(init_params == "kmeans") |
| { |
| [C, Y] = kmeans(X, n_components, 10, 10, tol, FALSE, 25) |
| resp = resp + t(seq(1,n_components)) |
| resp = resp == Y |
| } |
| else if(init_params == "random") |
| { |
| resp = Rand(rows = nrow(X), cols=n_components) |
| resp = resp/rowSums(resp) |
| } |
| else stop("invalid parameter value, expected kmeans or random found "+init_params) |
| |
| [weight, mean, sigma, precision_chol] = initialize(X, resp, n_components, model, reg_covar) |
| } |
| |
| |
| # Matrix/Vector Parameters |
| # input: (X[n_samples, n_features], resp[n_samples, n_components]) |
| # output: (weight[n_samples, n_components], mean[n_components, n_features], sigma/prec_chol depends on model type) |
| initialize = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model, Double reg_covar) |
| return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol) |
| { |
| n = nrow(X) |
| [weight, mean, sigma] = estimate_gaussian_param(X, resp, n_components, model, reg_covar) |
| weight = weight/n |
| precision_chol = compute_precision_cholesky(sigma, model) |
| } |
| |
| estimate_gaussian_param = function(Matrix[Double] X, Matrix[Double] resp, Integer n_components, String model, Double reg_covar) |
| return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma) |
| { |
| l = list() |
| n = nrow(X) |
| # estimate Gaussian parameter |
| nk = colSums(resp) + 2.220446049250313e-15 |
| mu = (t(resp) %*% X) / t(nk) |
| sigma = list() |
| if(model == "VVV") |
| sigma = covariances_VVV(X, resp, mu, nk, reg_covar) |
| else if(model == "EEE") |
| sigma = covariances_EEE(X, resp, mu, nk, reg_covar) |
| else if(model == "VVI") |
| sigma = covariances_VVI(X, resp, mu, nk, reg_covar) |
| else if (model == "VII") |
| sigma = covariances_VII(X, resp, mu, nk, reg_covar) |
| |
| weight = nk |
| mean = mu # n_components * n_features |
| } |
| |
| # Matrix/Vector Parameters/List |
| # input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components]) |
| # output: (sigma a list of length = n_components where each item in list is a covariance matrix of (n_features * n_features) dimensions) |
| covariances_VVV = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar) |
| return(List[Unknown] sigma) |
| { |
| sigma = list() |
| for(k in 1:nrow(mu)) |
| { |
| diff = X - mu[k,] |
| cov = (t(diff * resp[, k]) %*% diff) / as.scalar(nk[1,k]) |
| cov = cov + diag(matrix(reg_covar, ncol(cov), 1)) |
| sigma = append(sigma, cov) |
| } |
| } |
| |
| # Matrix/Vector Parameters/List |
| # input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components]) |
| # output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_features * n_features) dimensions) |
| covariances_EEE = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar) |
| return(List[Unknown] sigma) |
| { |
| sigma = list() |
| avgX2 = t(X) %*% X |
| avgMean = (t(mu) * nk) %*% mu |
| cov = avgX2 - avgMean |
| cov = cov / sum(nk) |
| cov = cov + diag(matrix(reg_covar, ncol(cov), 1)) |
| sigma = append(sigma, cov) |
| } |
| |
| # Matrix/Vector Parameters/List |
| # input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components]) |
| # output: (sigma a list of length = 1 where item in list is a covariance matrix of (n_components * n_features) dimensions) |
| covariances_VVI = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar) |
| return(List[Unknown] sigma) |
| { |
| sigma = list() |
| avgX2 = (t(resp) %*% (X*X)) / t(nk) |
| avgMean = mu ^ 2 |
| avgMean2 = mu * (t(resp) %*% X) / t(nk) |
| cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar |
| sigma = append(sigma, cov) |
| } |
| |
| # Matrix/Vector Parameters/List |
| # input: (X[n_samples, n_features], resp[n_samples, n_components], mu[n_components, n_features], nk[1, n_components]) |
| # output: (sigma a list of length = 1 where item in list is a variance value for each component (1* n_components) dimensions) |
| covariances_VII = function(Matrix[Double] X, Matrix[Double] resp, Matrix[Double] mu, Matrix[Double] nk, Double reg_covar) |
| return(List[Unknown] sigma) |
| { |
| sigma = list() |
| avgX2 = (t(resp) %*% (X*X)) / t(nk) |
| avgMean = mu ^ 2 |
| avgMean2 = mu * (t(resp) %*% X) / t(nk) |
| cov = avgX2 - 2 * avgMean + avgMean2 + reg_covar |
| sigma = list(rowMeans(cov)) |
| } |
| |
| compute_precision_cholesky = function(List[Unknown] sigma, String model) |
| return (List[Unknown] precision_chol) |
| { |
| precision_chol = list() |
| |
| if(model == "VVV") { |
| comp = length(sigma) |
| for(k in 1:length(sigma)) { |
| cov = as.matrix(sigma[k]) |
| isSPD = checkSPD(cov) |
| if(isSPD) { |
| cov_chol = cholesky(cov) |
| pre_chol = t(inv(cov_chol)) |
| precision_chol = append(precision_chol, pre_chol) |
| } else |
| stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric )."+ |
| "\nTry to decrease the number of components, or increase reg_covar") |
| } |
| } |
| else if(model == "EEE") { |
| cov = as.matrix(sigma[1]) |
| isSPD = checkSPD(cov) |
| if(isSPD) { |
| cov_chol = cholesky(cov) |
| pre_chol = t(inv(cov_chol)) |
| precision_chol = append(precision_chol, pre_chol) |
| } else |
| stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+ |
| "\nTry to decrease the number of components, or increase reg_covar") |
| } |
| else { |
| cov = as.matrix(sigma[1]) |
| if(sum(cov <= 0) > 0) |
| stop("Fitting the mixture model failed because some components have ill-defined empirical covariance (i.e., singleton matrix or non-symmetric)."+ |
| "\nTry to decrease the number of components, or increase reg_covar") |
| else { |
| precision_chol = append(precision_chol, 1.0/sqrt(cov)) |
| } |
| } |
| } |
| |
| # Expectation step |
| e_step = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model) |
| return(Double norm, Matrix[Double] log_resp){ |
| weighted_log_prob = estimate_weighted_log_prob(X, w, mu, precisions_cholesky, model) |
| log_prob_norm = logsumexp(weighted_log_prob) |
| log_resp = weighted_log_prob - log_prob_norm |
| norm = mean(log_prob_norm) |
| } |
| |
| # maximization Step |
| m_step = function(Matrix[Double] X, Matrix[Double] log_resp, Integer n_components, String model, Double reg_covar) |
| return (Matrix[Double] weight, Matrix[Double] mean, List[Unknown] sigma, List[Unknown] precision_chol) { |
| n = nrow(X) |
| [weight, mean, sigma] = estimate_gaussian_param(X, exp(log_resp), n_components, model, reg_covar) |
| weight = weight/n |
| precision_chol = compute_precision_cholesky(sigma, model) |
| } |
| |
| estimate_weighted_log_prob = function(Matrix[Double] X, Matrix[Double] w, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model) |
| return (Matrix[Double] weight_log_pro) |
| { |
| weight_log_pro = estimate_log_prob(X, mu, precisions_cholesky, model) + estimate_log_weights(w) |
| } |
| |
| estimate_log_weights = function(Matrix[Double] w) |
| return (Matrix[Double] log_weight) |
| { |
| log_weight = log(w) |
| } |
| |
| estimate_log_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] precisions_cholesky, String model) |
| return (Matrix[Double] log_prob) |
| { |
| log_prob = estimate_log_gaussian_prob( |
| X, mu, precisions_cholesky, model) |
| } |
| |
| compute_log_det_cholesky = function(List[Unknown] mat_chol, String model, Integer d) |
| return(Matrix[Double] log_det_cholesky) |
| { |
| comp = length(mat_chol) |
| |
| if(model == "VVV") |
| { |
| log_det_chol = matrix(0, 1, comp) |
| for(k in 1:comp) |
| { |
| mat = as.matrix(mat_chol[k]) |
| log_det = sum(log(diag(t(mat)))) # have to take the log of diag elements only |
| log_det_chol[1,k] = log_det |
| } |
| } |
| else if(model == "EEE") |
| { |
| mat = as.matrix(mat_chol[1]) |
| log_det_chol = as.matrix(sum(log(diag(mat)))) |
| } |
| else if(model == "VVI") |
| { |
| mat = as.matrix(mat_chol[1]) |
| log_det_chol = t(rowSums(log(mat))) |
| } |
| else if (model == "VII") |
| { |
| mat = as.matrix(mat_chol[1]) |
| log_det_chol = t(d * log(mat)) |
| } |
| log_det_cholesky = log_det_chol |
| } |
| |
| estimate_log_gaussian_prob = function(Matrix[Double] X, Matrix[Double] mu, List[Unknown] prec_chol, String model) |
| return(Matrix[Double] es_log_prob ) # nrow(X) * n_components |
| { |
| n = nrow(X) |
| d = ncol(X) |
| n_components = nrow(mu) |
| |
| log_det = compute_log_det_cholesky(prec_chol, model, d) |
| if(model == "VVV") |
| { |
| log_prob = matrix(0, n, n_components) |
| for(k in 1:n_components) |
| { |
| prec = as.matrix(prec_chol[k]) |
| y = X %*% prec - mu[k,] %*% prec # changing here t intro: y = X %*% prec - mu[k,] %*% prec |
| log_prob[, k] = rowSums(y*y) |
| } |
| } |
| else if(model == "EEE") |
| { |
| log_prob = matrix(0, n, n_components) |
| prec = as.matrix(prec_chol[1]) |
| for(k in 1:n_components) { |
| y = X %*% prec - mu[k,] %*% prec |
| log_prob[, k] = rowSums(y*y) # TODO replace y*y with squared built-in |
| } |
| } |
| else if(model == "VVI") |
| { |
| prec = as.matrix(prec_chol[1]) |
| precisions = prec^2 |
| bc_matrix = matrix(1,nrow(X), nrow(mu)) |
| log_prob = (bc_matrix*t(rowSums(mu^2 * precisions)) - |
| 2. * (X %*% t(mu * precisions)) + |
| X^2 %*% t(precisions)) |
| } |
| else if (model == "VII") |
| { |
| prec = as.matrix(prec_chol[1]) |
| precisions = prec^ 2 |
| bc_matrix = matrix(1,nrow(X), nrow(mu)) |
| log_prob = (bc_matrix * t(rowSums(mu^2) * precisions) - |
| 2 * X %*% t(mu * precisions) + |
| rowSums(X*X) %*% t(precisions) ) # TODO replace rowSums(X*X) with squared rowNorm() built-in |
| } |
| if(ncol(log_det) == 1) |
| log_det = matrix(1, 1, ncol(log_prob)) * log_det |
| es_log_prob = -.5 * (d * log(2 * pi) + log_prob) + log_det |
| } |
| |
| logsumexp = function(Matrix[Double] M) # TODO replace with a built-in function logsumexp |
| return(Matrix[Double] soft) |
| { |
| max = max(M) |
| ds = M - max |
| sumOfexp = rowSums(exp(ds)) |
| soft = max + log(sumOfexp) |
| } |
| |
| # compute the number of estimated parameters |
| estimate_free_param = function(Integer n_components, Integer n_features, String model) |
| return (Integer n_parameters) |
| { |
| if(model == "VVV") |
| cov_param = n_components * n_features * (n_features + 1) / 2 |
| else if(model == "EEE") |
| cov_param = n_features * (n_features + 1) / 2 |
| else if (model == "VVI") |
| cov_param = n_components * n_features |
| else if (model == "VII") |
| cov_param = n_components |
| else |
| stop("invalid model expecting any of [VVV,EEE,VVI,VII], found "+model) |
| mean_param = n_features * n_components |
| |
| n_parameters = as.integer( cov_param + mean_param + n_components - 1 ) |
| |
| } |
| |
| fit = function(Matrix[Double] X, Integer n_components, String model, String init_params, Integer iter , Double reg_covar, Double tol) |
| return (Matrix[Double] label, Matrix[Double] predict_prob, Double log_prob_norm) |
| { |
| |
| lower_bound = 0 |
| converged = FALSE |
| n = nrow(X) |
| [weight, mean, sigma, precision_chol] = initialize_param(X, n_components,init_params, model, reg_covar, tol) |
| i = 1 |
| while(i <= iter & !converged) { |
| prev_lower_bound = lower_bound |
| [log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model) |
| [weight, mean, sigma, precision_chol] = m_step(X, log_resp, n_components, model, reg_covar) |
| lower_bound = log_prob_norm |
| change = lower_bound - prev_lower_bound |
| if(abs(change) < tol) |
| converged = TRUE |
| i = i+1 |
| } |
| [log_prob_norm, log_resp] = e_step(X,weight, mean, precision_chol, model) |
| label = rowIndexMax(log_resp) |
| predict_prob = exp(log_resp) |
| } |
| |
| getBIC = function(Integer n, Double norm, Integer df) |
| return(Double bic) |
| { |
| bic = -2 * norm * n + df * log(n) |
| } |
| |
| # check if covariance matrix is symmetric and positive definite |
| checkSPD = function(Matrix[Double] A) |
| return(Boolean isSPD) |
| { |
| # abs(a - t(a)) <= (absoluteTolerance + relativeTolerance * abs(b)) |
| sym = abs(A - t(A)) <= (1e-10 * abs(t(A))) |
| if(sum(sym == 0) == 0) |
| { |
| [eval, evec] = eigen(A); |
| if(sum(eval < 0) == 0) #check positive definite |
| isSPD = TRUE |
| else isSPD = FALSE |
| } |
| else isSPD = FALSE |
| } |
| |