blob: abd6659c801cc56e8bc0ca8b128bd52e917d35cc [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# The gnmf-function does Gaussian Non-Negative Matrix Factorization. In this, a matrix X is factorized into two
# matrices W and H, such that all three matrices have no negative elements. This non-negativity makes the resulting
# matrices easier to inspect.
#
# References:
# [Chao Liu, Hung-chih Yang, Jinliang Fan, Li-Wei He, Yi-Min Wang:
# Distributed nonnegative matrix factorization for web-scale dyadic
# data analysis on mapreduce. WWW 2010: 681-690]
#
# INPUT:
# --------------------------------------------------------------------------------------
# X Matrix of feature vectors.
# rnk Number of components into which matrix X is to be factored
# eps Tolerance
# maxi Maximum number of conjugate gradient iterations
# --------------------------------------------------------------------------------------
#
# OUTPUT:
# --------------------------------------------------------------------------------------
# W List of pattern matrices, one for each repetition
# H List of amplitude matrices, one for each repetition
# --------------------------------------------------------------------------------------
m_gnmf = function(Matrix[Double] X, Integer rnk, Double eps = 1e-8, Integer maxi = 10)
return (Matrix[Double] W, Matrix[Double] H)
{
#initialize W and H
W = rand(rows=nrow(X), cols=rnk, min=-0.05, max=0.05);
H = rand(rows=rnk, cols=ncol(X), min=-0.05, max=0.05);
i = 0;
while(i < maxi) {
H = H * ((t(W) %*% X) / (((t(W) %*% W) %*% H)+eps));
W = W * ((X %*% t(H)) / ((W %*% (H %*% t(H)))+eps));
i = i + 1;
}
}