src/ports/postgres/modules/svd_mf/svdmf.sql_in - madlib - Git at Google

 /* ----------------------------------------------------------------------- *//**
  *
  * @file svdmf.sql_in
  *
  * @brief SQL functions for SVD Matrix Factorization
  * @date   January 2011
  *
  * @sa For a brief introduction to SVD Matrix Factorization, see the module
  *     description \ref grp_svd.
  *
  *//* ----------------------------------------------------------------------- */

 m4_include(`SQLCommon.m4')

 /**
 @addtogroup grp_svdmf

 \warning <em> This is an old implementation of Matrix Decomposition and
 has been deprecated. For SVD decomposition, please see \ref grp_svd;
 for the latest version of low-rank approximation, please see \ref grp_lmf</em>

 <div class="toc"><b>Contents</b>
 <ul>
 <li><a href="#syntax">SVD Function Syntax</a></li>
 <li><a href="#xamples">Examples</a></li>
 <li><a href="#literature">Literature</a></li>
 <li><a href="#related">Related Topics</a></li>
 </ul>
 </div>

 @brief Computes low-rank approximation of a sparse matrix.

 This module implements "partial SVD decomposition" method for representing a sparse matrix using a low-rank approximation.
 Mathematically, this algorithm seeks to find matrices U and V that, for any given A, minimizes:\n
 \f[ ||\boldsymbol A - \boldsymbol UV ||_2
 \f]
 subject to \f$ rank(\boldsymbol UV) \leq k \f$, where \f$ ||\cdot||_2 \f$ denotes the Frobenius norm and \f$ k \leq rank(\boldsymbol A)\f$.
 If A is \f$ m \times n \f$, then U will be \f$ m \times k \f$ and V will be \f$ k \times n \f$.

 This algorithm is not intended to do the full decomposition, or to be used as part of
 inverse procedure. It effectively computes the SVD of a low-rank approximation of A (preferably sparse), with the singular values absorbed in U and V.
 Code is based on the write-up as appears at [1], with some modifications.


 @anchor syntax
 @par Function Syntax

 The SVD function is called as follows:
 <pre class="syntax">
 svdmf_run( input_table,
            col_name,
            row_name,
            value, num_features)
 </pre>

 The <b>input matrix</b> is expected to be of the following form:
 <pre>{TABLE|VIEW} <em>input_table</em> (
     <em>col_num</em> INTEGER,
     <em>row_num</em> INTEGER,
   <em>value</em> FLOAT
 )</pre>

 Input is contained in a table where column number and row number for each cell
 are sequential; that is to say that if the data was written as a matrix, those values would be the
 actual row and column numbers and not some random identifiers. All rows and columns must be associated with a value.
 There should not be any missing row, columns or values.

 The function returns two tables \c matrix_u and \c matrix_v, which represent the matrices U and V in table format.

 @anchor examples
 @examp

 -# Prepare an input table/view.
 <pre class="example">
 CREATE TABLE svd_test ( col INT,
                         row INT,
                         val FLOAT
                       );
 </pre>
 -# Populate the input table with some data.
 <pre class="example">
 INSERT INTO svd_test SELECT ( g.a%1000)+1, g.a/1000+1, random()
                      FROM generate_series(1,1000) AS g(a);
 </pre>
 -# Call the svdmf_run() stored procedure.
 <pre class="example">
 SELECT madlib.svdmf_run( 'svd_test',
                          'col',
                          'row',
                          'val',
                          3);
 </pre>
 Example result:
 <pre class="result">
 INFO:  ('Started svdmf_run() with parameters:',)
 INFO:  (' * input_matrix = madlib_svdsparse_test.test',)
 INFO:  (' * col_name = col_num',)
 INFO:  (' * row_name = row_num',)
 INFO:  (' * value = val',)
 INFO:  (' * num_features = 3',)
 INFO:  ('Copying the source data into a temporary table...',)
 INFO:  ('Estimating feature: 1',)
 INFO:  ('...Iteration 1: residual_error = 33345014611.1, step_size = 4.9997500125e-10, min_improvement = 1.0',)
 INFO:  ('...Iteration 2: residual_error = 33345014557.6, step_size = 5.49972501375e-10, min_improvement = 1.0',)
 INFO:  ('...Iteration 3: residual_error = 33345014054.3, step_size = 6.04969751512e-10, min_improvement = 1.0',)
 ...
 INFO:  ('...Iteration 78: residual_error = 2.02512133868, step_size = 5.78105354457e-10, min_improvement = 1.0',)
 INFO:  ('...Iteration 79: residual_error = 0.893810181282, step_size = 6.35915889903e-10, min_improvement = 1.0',)
 INFO:  ('...Iteration 80: residual_error = 0.34496773222, step_size = 6.99507478893e-10, min_improvement = 1.0',)
 INFO:  ('Swapping residual error matrix...',)
                                          svdmf_run
 &nbsp;-------------------------------------------------------------------------------------------

  Finished SVD matrix factorisation for madlib_svdsparse_test.test (row_num, col_num, val).
  Results:
   * total error = 0.34496773222
   * number of estimated features = 1
  Output:
   * table : madlib.matrix_u
   * table : madlib.matrix_v
  Time elapsed: 4 minutes 47.86839 seconds.
 </pre>

 @anchor literature
 @literature

 [1] Simon Funk, Netflix Update: Try This at Home, December 11 2006,
     http://sifter.org/~simon/journal/20061211.html


 @anchor related
 @par Related Topics
 File svdmf.sql_in documenting the SQL functions.

 @internal
 @sa namespace svdmf (documenting the implementation in Python)
 @endinternal

 */

 /**
  * @brief Partial SVD decomposition of a sparse matrix into U and V components
  *
  * This function takes as input the table representation of a sparse matrix and
  * decomposes it into the specified set of most significant features of matrices
  * of U and V matrix.
  *
  *   @param input_table Name of the table/view with the source data
  *   @param col_name Name of the column containing cell column number
  *   @param row_name Name of the column containing cell row number
  *   @param value Name of the column containing cell value
  *   @param num_features Rank of desired approximation
  *
  */
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svdmf_run(
     input_table TEXT, col_name TEXT, row_name TEXT, value TEXT, num_features INT
 )
 RETURNS TEXT
 AS $$

     PythonFunctionBodyOnly(`svd_mf', `svdmf')

     # schema_madlib comes from PythonFunctionBodyOnly
     return svdmf.svdmf_run( schema_madlib, input_table, col_name, row_name, value, num_features);

 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 /**
  * @brief Partial SVD decomposition of a sparse matrix into U and V components
  *
  * This function takes as input the table representation of a sparse matrix and
  * decomposes it into the specified set of most significant features of matrices
  * of U and V matrix.
  *
  *   @param input_table Name of the table/view with the source data
  *   @param col_name Name of the column containing cell column number
  *   @param row_name Name of the column containing cell row number
  *   @param value Name of the column containing cell value
  *   @param num_features Rank of desired approximation
  *   @param num_iterations Maximum number if iterations to perform regardless of convergence
  *   @param min_error Acceptable level of error in convergence.
  *
  */

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svdmf_run(
     input_table TEXT, col_name TEXT, row_name TEXT, value TEXT, num_features INT, num_iterations INT, min_error FLOAT
 )
 RETURNS TEXT
 AS $$

     PythonFunctionBodyOnly(`svd_mf', `svdmf')

     # schema_madlib comes from PythonFunctionBodyOnly
     return svdmf.svdmf_run_full( schema_madlib, input_table, col_name, row_name, value, num_features, num_iterations, min_error);

 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
	/* ----------------------------------------------------------------------- //*
	*
	* @file svdmf.sql_in
	*
	* @brief SQL functions for SVD Matrix Factorization
	* @date January 2011
	*
	* @sa For a brief introduction to SVD Matrix Factorization, see the module
	* description \ref grp_svd.
	*
	// ----------------------------------------------------------------------- */

	m4_include(`SQLCommon.m4')

	/**
	@addtogroup grp_svdmf

	\warning <em> This is an old implementation of Matrix Decomposition and
	has been deprecated. For SVD decomposition, please see \ref grp_svd;
	for the latest version of low-rank approximation, please see \ref grp_lmf</em>

	<div class="toc"><b>Contents</b>
	<ul>
	<li><a href="#syntax">SVD Function Syntax</a></li>
	<li><a href="#xamples">Examples</a></li>
	<li><a href="#literature">Literature</a></li>
	<li><a href="#related">Related Topics</a></li>
	</ul>
	</div>

	@brief Computes low-rank approximation of a sparse matrix.

	This module implements "partial SVD decomposition" method for representing a sparse matrix using a low-rank approximation.
	Mathematically, this algorithm seeks to find matrices U and V that, for any given A, minimizes:\n
	\f[ \|\|\boldsymbol A - \boldsymbol UV \|\|_2
	\f]
	subject to \f$ rank(\boldsymbol UV) \leq k \f$, where \f$ \|\|\cdot\|\|_2 \f$ denotes the Frobenius norm and \f$ k \leq rank(\boldsymbol A)\f$.
	If A is \f$ m \times n \f$, then U will be \f$ m \times k \f$ and V will be \f$ k \times n \f$.

	This algorithm is not intended to do the full decomposition, or to be used as part of
	inverse procedure. It effectively computes the SVD of a low-rank approximation of A (preferably sparse), with the singular values absorbed in U and V.
	Code is based on the write-up as appears at [1], with some modifications.



	@anchor syntax
	@par Function Syntax

	The SVD function is called as follows:
	<pre class="syntax">
	svdmf_run( input_table,
	col_name,
	row_name,
	value, num_features)
	</pre>

	The <b>input matrix</b> is expected to be of the following form:
	<pre>{TABLE\|VIEW} <em>input_table</em> (
	<em>col_num</em> INTEGER,
	<em>row_num</em> INTEGER,
	<em>value</em> FLOAT
	)</pre>

	Input is contained in a table where column number and row number for each cell
	are sequential; that is to say that if the data was written as a matrix, those values would be the
	actual row and column numbers and not some random identifiers. All rows and columns must be associated with a value.
	There should not be any missing row, columns or values.

	The function returns two tables \c matrix_u and \c matrix_v, which represent the matrices U and V in table format.

	@anchor examples
	@examp

	-# Prepare an input table/view.
	<pre class="example">
	CREATE TABLE svd_test ( col INT,
	row INT,
	val FLOAT
	);
	</pre>
	-# Populate the input table with some data.
	<pre class="example">
	INSERT INTO svd_test SELECT ( g.a%1000)+1, g.a/1000+1, random()
	FROM generate_series(1,1000) AS g(a);
	</pre>
	-# Call the svdmf_run() stored procedure.
	<pre class="example">
	SELECT madlib.svdmf_run( 'svd_test',
	'col',
	'row',
	'val',
	3);
	</pre>
	Example result:
	<pre class="result">
	INFO: ('Started svdmf_run() with parameters:',)
	INFO: (' * input_matrix = madlib_svdsparse_test.test',)
	INFO: (' * col_name = col_num',)
	INFO: (' * row_name = row_num',)
	INFO: (' * value = val',)
	INFO: (' * num_features = 3',)
	INFO: ('Copying the source data into a temporary table...',)
	INFO: ('Estimating feature: 1',)
	INFO: ('...Iteration 1: residual_error = 33345014611.1, step_size = 4.9997500125e-10, min_improvement = 1.0',)
	INFO: ('...Iteration 2: residual_error = 33345014557.6, step_size = 5.49972501375e-10, min_improvement = 1.0',)
	INFO: ('...Iteration 3: residual_error = 33345014054.3, step_size = 6.04969751512e-10, min_improvement = 1.0',)
	...
	INFO: ('...Iteration 78: residual_error = 2.02512133868, step_size = 5.78105354457e-10, min_improvement = 1.0',)
	INFO: ('...Iteration 79: residual_error = 0.893810181282, step_size = 6.35915889903e-10, min_improvement = 1.0',)
	INFO: ('...Iteration 80: residual_error = 0.34496773222, step_size = 6.99507478893e-10, min_improvement = 1.0',)
	INFO: ('Swapping residual error matrix...',)
	svdmf_run
	-------------------------------------------------------------------------------------------

	Finished SVD matrix factorisation for madlib_svdsparse_test.test (row_num, col_num, val).
	Results:
	* total error = 0.34496773222
	* number of estimated features = 1
	Output:
	* table : madlib.matrix_u
	* table : madlib.matrix_v
	Time elapsed: 4 minutes 47.86839 seconds.
	</pre>

	@anchor literature
	@literature

	[1] Simon Funk, Netflix Update: Try This at Home, December 11 2006,
	http://sifter.org/~simon/journal/20061211.html


	@anchor related
	@par Related Topics
	File svdmf.sql_in documenting the SQL functions.

	@internal
	@sa namespace svdmf (documenting the implementation in Python)
	@endinternal

	*/

	/**
	* @brief Partial SVD decomposition of a sparse matrix into U and V components
	*
	* This function takes as input the table representation of a sparse matrix and
	* decomposes it into the specified set of most significant features of matrices
	* of U and V matrix.
	*
	* @param input_table Name of the table/view with the source data
	* @param col_name Name of the column containing cell column number
	* @param row_name Name of the column containing cell row number
	* @param value Name of the column containing cell value
	* @param num_features Rank of desired approximation
	*
	*/
	CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svdmf_run(
	input_table TEXT, col_name TEXT, row_name TEXT, value TEXT, num_features INT
	)
	RETURNS TEXT
	AS $$

	PythonFunctionBodyOnly(`svd_mf', `svdmf')

	# schema_madlib comes from PythonFunctionBodyOnly
	return svdmf.svdmf_run( schema_madlib, input_table, col_name, row_name, value, num_features);

	$$ LANGUAGE plpythonu VOLATILE
	m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

	/**
	* @brief Partial SVD decomposition of a sparse matrix into U and V components
	*
	* This function takes as input the table representation of a sparse matrix and
	* decomposes it into the specified set of most significant features of matrices
	* of U and V matrix.
	*
	* @param input_table Name of the table/view with the source data
	* @param col_name Name of the column containing cell column number
	* @param row_name Name of the column containing cell row number
	* @param value Name of the column containing cell value
	* @param num_features Rank of desired approximation
	* @param num_iterations Maximum number if iterations to perform regardless of convergence
	* @param min_error Acceptable level of error in convergence.
	*
	*/

	CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svdmf_run(
	input_table TEXT, col_name TEXT, row_name TEXT, value TEXT, num_features INT, num_iterations INT, min_error FLOAT
	)
	RETURNS TEXT
	AS $$

	PythonFunctionBodyOnly(`svd_mf', `svdmf')

	# schema_madlib comes from PythonFunctionBodyOnly
	return svdmf.svdmf_run_full( schema_madlib, input_table, col_name, row_name, value, num_features, num_iterations, min_error);

	$$ LANGUAGE plpythonu VOLATILE
	m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');