| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file svdmf.sql_in |
| * |
| * @brief SQL functions for SVD Matrix Factorization |
| * @date January 2011 |
| * |
| * @sa For a brief introduction to SVD Matrix Factorization, see the module |
| * description \ref grp_svd. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /** |
| @addtogroup grp_svdmf |
| |
| \warning <em> This is an old implementation of Matrix Decomposition and |
| has been deprecated. For SVD decomposition, please see \ref grp_svd; |
| for the latest version of low-rank approximation, please see \ref grp_lmf</em> |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li><a href="#syntax">SVD Function Syntax</a></li> |
| <li><a href="#xamples">Examples</a></li> |
| <li><a href="#literature">Literature</a></li> |
| <li><a href="#related">Related Topics</a></li> |
| </ul> |
| </div> |
| |
| @brief Computes low-rank approximation of a sparse matrix. |
| |
| This module implements "partial SVD decomposition" method for representing a sparse matrix using a low-rank approximation. |
| Mathematically, this algorithm seeks to find matrices U and V that, for any given A, minimizes:\n |
| \f[ ||\boldsymbol A - \boldsymbol UV ||_2 |
| \f] |
| subject to \f$ rank(\boldsymbol UV) \leq k \f$, where \f$ ||\cdot||_2 \f$ denotes the Frobenius norm and \f$ k \leq rank(\boldsymbol A)\f$. |
| If A is \f$ m \times n \f$, then U will be \f$ m \times k \f$ and V will be \f$ k \times n \f$. |
| |
| This algorithm is not intended to do the full decomposition, or to be used as part of |
| inverse procedure. It effectively computes the SVD of a low-rank approximation of A (preferably sparse), with the singular values absorbed in U and V. |
| Code is based on the write-up as appears at [1], with some modifications. |
| |
| |
| |
| @anchor syntax |
| @par Function Syntax |
| |
| The SVD function is called as follows: |
| <pre class="syntax"> |
| svdmf_run( input_table, |
| col_name, |
| row_name, |
| value, num_features) |
| </pre> |
| |
| The <b>input matrix</b> is expected to be of the following form: |
| <pre>{TABLE|VIEW} <em>input_table</em> ( |
| <em>col_num</em> INTEGER, |
| <em>row_num</em> INTEGER, |
| <em>value</em> FLOAT |
| )</pre> |
| |
| Input is contained in a table where column number and row number for each cell |
| are sequential; that is to say that if the data was written as a matrix, those values would be the |
| actual row and column numbers and not some random identifiers. All rows and columns must be associated with a value. |
| There should not be any missing row, columns or values. |
| |
| The function returns two tables \c matrix_u and \c matrix_v, which represent the matrices U and V in table format. |
| |
| @anchor examples |
| @examp |
| |
| -# Prepare an input table/view. |
| <pre class="example"> |
| CREATE TABLE svd_test ( col INT, |
| row INT, |
| val FLOAT |
| ); |
| </pre> |
| -# Populate the input table with some data. |
| <pre class="example"> |
| INSERT INTO svd_test SELECT ( g.a%1000)+1, g.a/1000+1, random() |
| FROM generate_series(1,1000) AS g(a); |
| </pre> |
| -# Call the svdmf_run() stored procedure. |
| <pre class="example"> |
| SELECT madlib.svdmf_run( 'svd_test', |
| 'col', |
| 'row', |
| 'val', |
| 3); |
| </pre> |
| Example result: |
| <pre class="result"> |
| INFO: ('Started svdmf_run() with parameters:',) |
| INFO: (' * input_matrix = madlib_svdsparse_test.test',) |
| INFO: (' * col_name = col_num',) |
| INFO: (' * row_name = row_num',) |
| INFO: (' * value = val',) |
| INFO: (' * num_features = 3',) |
| INFO: ('Copying the source data into a temporary table...',) |
| INFO: ('Estimating feature: 1',) |
| INFO: ('...Iteration 1: residual_error = 33345014611.1, step_size = 4.9997500125e-10, min_improvement = 1.0',) |
| INFO: ('...Iteration 2: residual_error = 33345014557.6, step_size = 5.49972501375e-10, min_improvement = 1.0',) |
| INFO: ('...Iteration 3: residual_error = 33345014054.3, step_size = 6.04969751512e-10, min_improvement = 1.0',) |
| ... |
| INFO: ('...Iteration 78: residual_error = 2.02512133868, step_size = 5.78105354457e-10, min_improvement = 1.0',) |
| INFO: ('...Iteration 79: residual_error = 0.893810181282, step_size = 6.35915889903e-10, min_improvement = 1.0',) |
| INFO: ('...Iteration 80: residual_error = 0.34496773222, step_size = 6.99507478893e-10, min_improvement = 1.0',) |
| INFO: ('Swapping residual error matrix...',) |
| svdmf_run |
| ------------------------------------------------------------------------------------------- |
| |
| Finished SVD matrix factorisation for madlib_svdsparse_test.test (row_num, col_num, val). |
| Results: |
| * total error = 0.34496773222 |
| * number of estimated features = 1 |
| Output: |
| * table : madlib.matrix_u |
| * table : madlib.matrix_v |
| Time elapsed: 4 minutes 47.86839 seconds. |
| </pre> |
| |
| @anchor literature |
| @literature |
| |
| [1] Simon Funk, Netflix Update: Try This at Home, December 11 2006, |
| http://sifter.org/~simon/journal/20061211.html |
| |
| |
| @anchor related |
| @par Related Topics |
| File svdmf.sql_in documenting the SQL functions. |
| |
| @internal |
| @sa namespace svdmf (documenting the implementation in Python) |
| @endinternal |
| |
| */ |
| |
| /** |
| * @brief Partial SVD decomposition of a sparse matrix into U and V components |
| * |
| * This function takes as input the table representation of a sparse matrix and |
| * decomposes it into the specified set of most significant features of matrices |
| * of U and V matrix. |
| * |
| * @param input_table Name of the table/view with the source data |
| * @param col_name Name of the column containing cell column number |
| * @param row_name Name of the column containing cell row number |
| * @param value Name of the column containing cell value |
| * @param num_features Rank of desired approximation |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svdmf_run( |
| input_table TEXT, col_name TEXT, row_name TEXT, value TEXT, num_features INT |
| ) |
| RETURNS TEXT |
| AS $$ |
| |
| PythonFunctionBodyOnly(`svd_mf', `svdmf') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return svdmf.svdmf_run( schema_madlib, input_table, col_name, row_name, value, num_features); |
| |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| /** |
| * @brief Partial SVD decomposition of a sparse matrix into U and V components |
| * |
| * This function takes as input the table representation of a sparse matrix and |
| * decomposes it into the specified set of most significant features of matrices |
| * of U and V matrix. |
| * |
| * @param input_table Name of the table/view with the source data |
| * @param col_name Name of the column containing cell column number |
| * @param row_name Name of the column containing cell row number |
| * @param value Name of the column containing cell value |
| * @param num_features Rank of desired approximation |
| * @param num_iterations Maximum number if iterations to perform regardless of convergence |
| * @param min_error Acceptable level of error in convergence. |
| * |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svdmf_run( |
| input_table TEXT, col_name TEXT, row_name TEXT, value TEXT, num_features INT, num_iterations INT, min_error FLOAT |
| ) |
| RETURNS TEXT |
| AS $$ |
| |
| PythonFunctionBodyOnly(`svd_mf', `svdmf') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return svdmf.svdmf_run_full( schema_madlib, input_table, col_name, row_name, value, num_features, num_iterations, min_error); |
| |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |