src/ports/postgres/modules/linalg/svd.sql_in - madlib - Git at Google

 /* ----------------------------------------------------------------------- *//**
  *
  * @file svd.sql_in
  *
  * @brief Singular Value Decomposition
  *
  * @sa For a brief introduction to singular value decomposition, see the module
  *     description \ref grp_svd.
  *
  *//* ----------------------------------------------------------------------- */

 m4_include(`SQLCommon.m4')

 /**
 @addtogroup grp_svd

 <div class="toc"><b>Contents</b>
 <ul>
 <li><a href="#syntax">SVD Functions</a></li>
 <li><a href="#output">Output Tables</a></li>
 <li><a href="#examples">Examples</a><li>
 <li><a href="#background">Technical Background</a></li>
 </ul>
 </div>

 @brief Performs factorization of dense, sparse, and block matrices.

 In linear algebra, the singular value decomposition (SVD) is a factorization of
 a real or complex matrix, with many useful applications in signal processing and
 statistics.

 Let \f$A\f$ be a \f$mxn\f$ matrix, where \f$m \ge n\f$. Then \f$A\f$ can be
 decomposed as follows:
 \f[
     A = U \Sigma V^T,
 \f]
 where \f$U\f$ is a \f$m \times n\f$ orthonormal matrix,
 \f$\Sigma\f$ is a \f$n \times n\f$ diagonal matrix, and \f$V\f$ is an
 \f$n \times n\f$ orthonormal matrix. The diagonal elements of \f$\Sigma\f$ are
 called the \e {singular values}.

 @anchor syntax
 @par SVD Functions

 SVD factorizations are provided for dense matrices, sparse matrices, and block
 matrices. In addition, a native implementation is provided for sparse
 matrices for improved performance.

 <b>SVD Function for Dense Matrices</b>

 <pre class="syntax">
 svd( source_table,
      output_table_prefix,
      row_id,
      k,
      n_iterations,
      result_summary_table
 );
 </pre>
 \b Arguments
 <dl class="arglist">
     <dt>source_table</dt>
     <dd>TEXT. Source table name (dense matrix).

     The table contains a \c row_id column that identifies each row.
     Further, the other columns are assumed to be the data for the matrix
     represented in two possible forms, illustrated by the following 2x2 matrix example:
     -# <pre class="example">
             row_id     col1     col2
 row1         1           1         0
 row2         2           0         1
     </pre>
     -# <pre class="example">
         row_id     row_vec
 row1        1       {1, 0}
 row2        2       {0, 1}
     </pre>
     </dd>
     <dt>output_table_prefix</dt>
     <dd>TEXT. Prefix for output tables. See <a href="#output">Output Tables</a>.</dd>
     <dt>row_id</dt>
     <dd>TEXT. ID for each row.</dd>
     <dt>k</dt>
     <dd>INTEGER. Number of singular vectors to compute.</dd>
     <dt>n_iterations (optional)</dt>
     <dd>INTEGER. Number of iterations to run.</dd>
     <dt>result_summary_table (optional)</dt>
     <dd>TEXT. The name of the table to store result summary.</dd>
 </dl>

 <hr>
 <b>SVD Function for Sparse Matrix input</b>

 Use this function for matrices that are represented in the sparse-matrix
 format (example below). <b>The input matrix is converted to a dense matrix
 before the SVD operation.</b>

 <pre class="syntax">
 svd_sparse( source_table,
             output_table_prefix,
             row_id,
             col_id,
             value,
             row_dim,
             col_dim,
             k,
             n_iterations,
             result_summary_table
           );
 </pre>
 \b Arguments
 <dl class="arglist">
 <dt>source_table</dt>
 <dd>TEXT. Source table name (sparse matrix).

 An example sparse matrix representation is given below:
 <pre class="example">
        row_id    col_id    value
 row1      1         1        2
 row2      2         1        1
 row3      3         2        1
 </pre>
 The \c row_id represents the row number, \c col_id represents the column
 number and the \c value represents the matrix value at [\c row_id, \c col_id].
 The \c row_id and \c col_id values are indexed starting from 0. Thus the
 \c row_id ranges from 1 to \c row_dim, while the \c col_id ranges from 1 to
 \c col_dim
 </dd>

 <dt>output_table_prefix</dt>
 <dd>TEXT. Prefix for output tables. See <a href="#output">Output Tables</a>.</dd>
 <dt>row_id</dt>
 <dd>TEXT. Name of the column containing the row index for each entry in sparse matrix.</dd>
 <dt>col_id</dt>
 <dd>TEXT. Name of the column containing the column index for each entry in sparse matrix.</dd>
 <dt>value</dt>
 <dd>TEXT. Name of column containing the non-zero values of the sparse matrix.</dd>
 <dt>row_dim</dt>
 <dd>INTEGER. Number of rows in matrix.</dd>
 <dt>col_dim</dt>
 <dd>INTEGER. Number of columns in matrix.</dd>
 <dt>k</dt>
 <dd>INTEGER. Number of singular vectors to compute.</dd>
 <dt>n_iterations (optional)</dt>
 <dd>INTEGER. Number of iterations to run.</dd>
 <dt>result_summary_table (optional)</dt>
 <dd>TEXT, default: NULL. The name of the table to store a summary of the results.</dd>
 </dl>


 <hr>
 <b>Native implementation for sparse matrix</b>

 Use this function for matrices that are represented in the sparse-matrix
 format (example below). This function use the native sparse representation while
 computing the SVD. <b>This function should be favored if the matrix is highly sparse.</b>

 <pre class="syntax">
 svd_sparse_native( source_table,
                    output_table_prefix,
                    row_id,
                    col_id,
                    value,
                    row_dim,
                    col_dim,
                    k,
                    n_iterations,
                    result_summary_table
                  );
 </pre>
 \b Arguments
 <dl class="arglist">
     <dt>source_table</dt>
     <dd>TEXT. Source table name (sparse matrix - see example above).</dd>
     <dt>output_table_prefix</dt>
     <dd>TEXT. Prefix for output tables. See <a href="#output">Output Tables</a>.</dd>
     <dt>row_id</dt>
     <dd>TEXT. ID for each row.</dd>
     <dt>col_id</dt>
     <dd>TEXT. ID for each column.</dd>
     <dt>value</dt>
     <dd>TEXT. Non-zero values of the sparse matrix.</dd>
     <dt>row_dim</dt>
     <dd>INTEGER. Row dimension of sparse matrix.</dd>
     <dt>col_dim</dt>
     <dd>INTEGER. Col dimension of sparse matrix.</dd>
     <dt>k</dt>
     <dd>INTEGER. Number of singular vectors to compute.</dd>
     <dt>n_iterations (optional)</dt>
     <dd>INTEGER. Number of iterations to run.</dd>
     <dt>result_summary_table (optional)</dt>
     <dd>TEXT. Table name to store result summary.</dd>
 </dl>

 <hr>
 <b>Block matrices</b>

 <pre class="syntax">
 svd_block( source_table,
            output_table_prefix,
            k,
            n_iterations,
            result_summary_table
          );
 </pre>
 \b Arguments
 <dl class="arglist">
     <dt>source_table</dt>
     <dd>TEXT. Source table name (block matrix).</dd>
     <dt>output_table_prefix</dt>
     <dd>TEXT. Prefix for output tables. See <a href="#output">Output Tables</a>.</dd>
     <dt>k</dt>
     <dd>INTEGER. Number of singular vectors to compute.</dd>
     <dt>n_iterations (optional)</dt>
     <dd>INTEGER. Number of iterations to run.</dd>
     <dt>result_summary_table (optional)</dt>
     <dd>TEXT. Table name to store result summary.</dd>
 </dl>

 @anchor output
 @par Output Tables

 Output for eigen vectors/values is in the following three tables:
    - Left singular matrix: Table named \<output_table_prefix\>_left (e.g. ‘netflix_u’)
    - Right singular matrix: Table named \<output_table_prefix\>_right (e.g. ‘netflix_v’)
    - Singular values: Table named \<output_table_prefix\>_s (e.g. ‘netflix_s’)

 The singular vector tables are of the format:
 <table class="output">
     <tr>
         <th>row_id</th>
         <td>INTEGER. The ID corresponding to each eigen value (in decreasing order).</td>
     </tr>
     <tr>
         <th>row_vec</th>
         <td>FLOAT8[]. Singular vector elements for this row_id. Each array is of size k.</td>
     </tr>
 </table>

 The singular values table is in a sparse table format, since only the diagonal
 elements of the matrix are non-zero:
 <table class="output">
     <tr>
         <th>row_id</th>
         <td>INTEGER. \e i for \e ith eigen value.</td>
     </tr>
     <tr>
         <th>col_id</th>
         <td>INTEGER. \e i for \e ith eigen value (same as row_id).</td>
     </tr>
     <tr>
         <th>value</th>
         <td>FLOAT8. Eigen Value.</td>
     </tr>
 </table>
 All \c row_id (and \c col_id) in the above tables start from 0.


 The result summary table has the following columns:
 <table class="output">
     <tr>
         <th>rows_used</th>
         <td>INTEGER. Number of rows used for SVD calculation.</td>
     </tr>
     <tr>
         <th>exec_time</th>
         <td>FLOAT8. Total time for executing SVD.</td>
     </tr>
     <tr>
         <th>iter</th>
         <td>INTEGER. Total number of iterations run.</td>
     </tr>
     <tr>
         <th>recon_error</th>
         <td>FLOAT8. Total quality score (i.e. approximation quality) for this set of orthonormal basis.</td>
     </tr>
     <tr>
         <th>relative_recon_error</th>
         <td>FLOAT8. relative quality score.</td>
     </tr>
 </table>

 In the result summary table, the reconstruction error is computed as \f$
 \sqrt{mean((X - USV^T)_{ij}^2)} \f$, where the average is over all elements of
 the matrices. The relative reconstruction error is then computed as ratio of the
 reconstruction error and \f$ \sqrt{mean(X_{ij}^2)} \f$.


 @anchor examples
 @examp

 -# View online help for the SVD function.
 <pre class="example">
 SELECT madlib.svd();
 </pre>

 -# Create an input dataset (dense matrix).
 <pre class="example">
 CREATE TABLE mat (
     row_id integer,
     row_vec double precision[]
 );
 COPY mat (row_id, row_vec) FROM stdin delimiter '|';
 1|{396,840,353,446,318,886,15,584,159,383}
 2|{691,58,899,163,159,533,604,582,269,390}
 3|{293,742,298,75,404,857,941,662,846,2}
 4|{462,532,787,265,982,306,600,608,212,885}
 5|{304,151,337,387,643,753,603,531,459,652}
 6|{327,946,368,943,7,516,272,24,591,204}
 7|{877,59,260,302,891,498,710,286,864,675}
 8|{458,959,774,376,228,354,300,669,718,565}
 9|{824,390,818,844,180,943,424,520,65,913}
 10|{882,761,398,688,761,405,125,484,222,873}
 11|{528,1,860,18,814,242,314,965,935,809}
 12|{492,220,576,289,321,261,173,1,44,241}
 13|{415,701,221,503,67,393,479,218,219,916}
 14|{350,192,211,633,53,783,30,444,176,932}
 15|{909,472,871,695,930,455,398,893,693,838}
 16|{739,651,678,577,273,935,661,47,373,618}
 \\.
 </pre>

 -# Run SVD function for a dense matrix.
 <pre class="example">
 DROP TABLE IF EXISTS svd_u;
 DROP TABLE IF EXISTS svd_v;
 DROP TABLE IF EXISTS svd_s;
 SELECT madlib.svd( 'mat',
                    'svd',
                    'row_id',
                    10
                  );
 </pre>

 -# Create a sparse matrix by running the \ref matrix_sparsify() utility function on the dense matrix.
 <pre class="example">
 DROP TABLE IF EXISTS mat_sparse;
 SELECT madlib.matrix_sparsify( 'mat',
                                'mat_sparse',
                                FALSE
                              );
 DROP TABLE IF EXISTS svd_u;
 DROP TABLE IF EXISTS svd_v;
 DROP TABLE IF EXISTS svd_s;
 </pre>

 -# Run the SVD function for a sparse matrix.
 <pre class="example">
 SELECT madlib.svd_sparse( 'mat_sparse',
                           'svd',
                           'row_id',
                           'col_id',
                           'value',
                           10
                         );
 </pre>

 @anchor background
 @par Technical Background
 In linear algebra, the singular value decomposition (SVD) is a factorization of
 a real or complex matrix, with many useful applications in signal processing and
 statistics.
 Let \f$A\f$ be a \f$m \times n\f$ matrix, where \f$m \ge n\f$. Then \f$A\f$ can be
 decomposed as follows:
 \f[
     A = U \Sigma V^T,
 \f]
 where \f$U\f$ is a \f$m \times  n\f$ orthonormal matrix,
 \f$\Sigma\f$ is a \f$n \times n\f$ diagonal matrix, and \f$V\f$ is an
 \f$n \times  n\f$ orthonormal matrix. The diagonal elements of \f$\Sigma\f$ are
 called the \e {singular values}.
 It is possible to formulate the problem of computing the singular triplets
 (\f$\sigma_i, u_i, v_i\f$) of \f$A\f$ as an eigenvalue problem involving a Hermitian
 matrix related to \f$A\f$. There are two possible ways of achieving this:
 -# With the cross product matrix, \f$A^TA\f$ and \f$AA^T\f$
 -# With the cyclic matrix
       \f[
           H(A) =
       \begin{bmatrix}
       0   & A\\
       A^* & 0
       \end{bmatrix}
       \f]
 The singular values are the nonnegative square roots of the eigenvalues of the
 cross product matrix. This approach may imply a severe loss of accuracy in the
 smallest singular values. The cyclic matrix approach is an alternative that
 avoids this problem, but at the expense of significantly increasing the cost of
 the computation.
 Computing the cross product matrix explicitly is not recommended, especially in
 the case of sparse A. Bidiagonalization was proposed by Golub and Kahan
 [citation?] as a way of tridiagonalizing the cross product matrix without
 forming it explicitly.
 Consider the following decomposition
 \f[    A = P B Q^T, \f]
 where \f$P\f$ and \f$Q\f$ are unitary matrices and \f$B\f$ is an \f$m \times n\f$
 upper bidiagonal matrix. Then the tridiagonal matrix \f$B*B\f$ is unitarily
 similar to \f$A*A\f$. Additionally, specific methods exist that compute the
 singular values of \f$B\f$ without forming \f$B*B\f$. Therefore, after computing the SVD of B,
 \f[
     B = X\Sigma Y^T,
 \f]
 it only remains to compute the SVD of the original matrix with \f$U = PX\f$ and \f$V = QY\f$.
 */

 -- -----------------------------------------------------------------------
 -- Main function for SVD (Dense format)
 -- -----------------------------------------------------------------------
 /*
 @brief Compute an singular value decomposition for a dense matrix stored in a
         database table
 */
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd(
     source_table            TEXT,       -- Source table name (dense array-format matrix)
     output_table_prefix     TEXT,       -- Prefix for output tables
     row_id                  TEXT,       -- ID for each row
     k                       INTEGER,    -- Number of singular vectors to compute
     n_iterations            INTEGER,    -- Iteration number of Lanczos
     result_summary_table    TEXT        -- Table name to store result summary
 )
 RETURNS VOID AS $$
     PythonFunctionBodyOnly(`linalg', `svd')
     return svd.svd(
         schema_madlib, source_table, output_table_prefix,
         row_id, k, n_iterations, result_summary_table)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -- -----------------------------------------------------------------------
 -- Main function for SVD (Block format)
 -- Each row in the input table is a triple: <row_id, col_id, block>
 -- -----------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_block(
     source_table            TEXT,       -- Source table name (dense block-format matrix)
     output_table_prefix     TEXT,       -- Prefix for output tables
     k                       INTEGER,    -- Number of singular vectors to compute
     n_iterations            INTEGER,    -- Iteration number of Lanczos
     result_summary_table    TEXT        -- Table name to store result summary
 )
 RETURNS VOID AS $$
     PythonFunctionBodyOnly(`linalg', `svd')
     return svd.svd_block(
         schema_madlib, source_table, output_table_prefix, k,
         n_iterations, result_summary_table)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_block(
     source_table            TEXT,       -- Source table name (dense block-format matrix)
     output_table_prefix     TEXT,       -- Prefix for output tables
     k                       INTEGER,    -- Number of singular vectors to compute
     n_iterations            INTEGER     -- Iteration number of Lanczos
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd_block($1, $2, $3, $4, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_block(
     source_table            TEXT,       -- Source table name (dense block-format matrix)
     output_table_prefix     TEXT,       -- Prefix for output tables
     k                       INTEGER     -- Number of singular vectors to compute
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd_block($1, $2, $3, NULL, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -- -----------------------------------------------------------------------
 -- Main Function for SVD (sparse format)
 -- -----------------------------------------------------------------------
 /*
 @brief Compute an singular value decomposition for a sparse matrix stored in a
         database table
         ('input_table', 'output_table_prefix', ’row_id', ’col_id', 'value',
           row_dim, col_dim, k, 'result_summary_table')
 */
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_sparse(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- Name of ‘row_id’ column in sparse matrix representation
     col_id                  TEXT,    -- Name of 'col_id' column in sparse matrix representation
     val_id                  TEXT,    -- Name of 'val_id' column in sparse matrix representation
     row_dim                 INTEGER, -- row dimension of sparse matrix
     col_dim                 INTEGER, -- col dimension of sparse matrix
     k                       INTEGER, -- Number of singular vectors with dominant singular values, sorted decreasingly
     n_iterations            INTEGER, -- Iteration number of Lanczos
     result_summary_table    TEXT     -- Table name to store result summary
 )
 RETURNS VOID AS $$
     PythonFunctionBodyOnly(`linalg', `svd')
     return svd.svd_sparse(
         schema_madlib, source_table, output_table_prefix,
         row_id, col_id, val_id, row_dim, col_dim, k,
         n_iterations, result_summary_table)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_sparse_native(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- Name of ‘row_id’ column in sparse matrix representation
     col_id                  TEXT,    -- Name of 'col_id' column in sparse matrix representation
     val_id                  TEXT,    -- Name of 'val_id' column in sparse matrix representation
     row_dim                 INTEGER, -- row dimension of sparse matrix
     col_dim                 INTEGER, -- col dimension of sparse matrix
     k                       INTEGER, -- Number of singular vectors with dominant singular values,
                                      --      sorted decreasingly
     n_iterations            INTEGER, -- Iteration number of Lanczos
     result_summary_table    TEXT     -- Table name to store result summary
 )
 RETURNS VOID AS $$
     PythonFunctionBodyOnly(`linalg', `svd')
     return svd.svd_sparse_native(
         schema_madlib, source_table, output_table_prefix,
         row_id, col_id, val_id, row_dim, col_dim, k, n_iterations, result_summary_table)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -- -----------------------------------------------------------------------
 -- Overloaded functions for optional parameters
 -- -----------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- ID for each row
     k                       INTEGER  -- Number of singular vectors to compute
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd($1, $2, $3, $4, NULL, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');


 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_sparse(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- Name of ‘row_id’ column in sparse matrix representation
     col_id                  TEXT,    -- Name of 'col_id' column in sparse matrix representation
     val_id                  TEXT,    -- Name of 'val_id' column in sparse matrix representation
     row_dim                 INTEGER, -- row dimension of sparse matrix
     col_dim                 INTEGER, -- col dimension of sparse matrix
     k                       INTEGER  -- Number of singular vectors with dominant singular values, sorted decreasingly
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd_sparse($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- ID for each row
     k                       INTEGER, -- Number of singular vectors to compute
     n_iterations            INTEGER  -- Iteration number of Lanczos
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd($1, $2, $3, $4, $5, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');


 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_sparse(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- Name of ‘row_id’ column in sparse matrix representation
     col_id                  TEXT,    -- Name of 'col_id' column in sparse matrix representation
     val_id                  TEXT,    -- Name of 'val_id' column in sparse matrix representation
     row_dim                 INTEGER, -- row dimension of sparse matrix
     col_dim                 INTEGER, -- col dimension of sparse matrix
     k                       INTEGER, -- Number of singular vectors with dominant singular values, sorted decreasingly
     n_iterations            INTEGER  -- Iteration number of Lanczos
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd_sparse($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_sparse_native(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- Name of ‘row_id’ column in sparse matrix representation
     col_id                  TEXT,    -- Name of 'col_id' column in sparse matrix representation
     val_id                  TEXT,    -- Name of 'val_id' column in sparse matrix representation
     row_dim                 INTEGER, -- row dimension of sparse matrix
     col_dim                 INTEGER, -- col dimension of sparse matrix
     k                       INTEGER  -- Number of singular vectors with dominant singular
                                      --   values, sorted decreasingly
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd_sparse_native($1, $2, $3, $4, $5, $6, $7, $8, NULL, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.svd_sparse_native(
     source_table            TEXT,    -- Source table name (dense matrix)
     output_table_prefix     TEXT,    -- Prefix for output tables
     row_id                  TEXT,    -- Name of ‘row_id’ column in sparse matrix representation
     col_id                  TEXT,    -- Name of 'col_id' column in sparse matrix representation
     val_id                  TEXT,    -- Name of 'val_id' column in sparse matrix representation
     row_dim                 INTEGER, -- row dimension of sparse matrix
     col_dim                 INTEGER, -- col dimension of sparse matrix
     k                       INTEGER, -- Number of singular vectors with dominant singular values, sorted decreasingly
     n_iterations            INTEGER  -- Iteration number of Lanczos
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.svd_sparse_native($1, $2, $3, $4, $5, $6, $7, $8, $9, NULL)
 $$ LANGUAGE SQL
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');;

 ---------------------------------------------------------------------
 ------------------------Internal Functions---------------------------
 ---------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_unit_vector
 (
     dim INT     -- Dimension of the vector
 )
 -- RETURNS MADLIB_SCHEMA.__svd_unit_vector_result
 RETURNS DOUBLE PRECISION[]
 AS 'MODULE_PATHNAME', 'svd_unit_vector'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 DROP TYPE IF EXISTS MADLIB_SCHEMA.__svd_lanczos_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.__svd_lanczos_result AS
 (
     scalar  FLOAT8,     -- alpha/beta
     vec     FLOAT8[]    -- pvec/qvec
 );

 ---------------------------------------------------------------------
 -------Common Aggregator for Computing Lanzcos P/Q Vectors-----------
 ---------------------------------------------------------------------

 ---------------------------------------------------------------------
 ---------------------For Array-Format Matrix-------------------------
 ---------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_lanczos_sfunc
 (
     state       FLOAT8[],       -- A * q_j OR A_Trans * p_(j-1)
     row_id      INT,            -- Matrix row id
     row_array   FLOAT8[],       -- Matrix row array
     vector      FLOAT8[],       -- q_j OR p_(j-1)
     dimension   INT             -- row_dim OR col_dim
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_lanczos_sfunc'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------------------------------------------
 ---------------------For Block-Format Matrix-------------------------
 ---------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_block_lanczos_sfunc
 (
     state       FLOAT8[],       -- A * q_j OR A_Trans * p_(j-1)
     row_id      INT4,           -- Matrix block row id
     col_id      INT4,           -- Matrix block col id
     block       FLOAT8[],       -- Matrix block
     vector      FLOAT8[],       -- q_j OR p_(j-1)
     dimension   INT4            -- row_dim OR col_dim
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_block_lanczos_sfunc'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------------------------------------------
 ---------------------For Sparse-Format Matrix-------------------------
 ---------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_sparse_lanczos_sfunc
 (
     state       FLOAT8[],       -- A * q_j OR A_Trans * p_(j-1)
     row_id      INT4,           -- row id
     col_id      INT4,           -- col id
     val         FLOAT8,         -- value
     vector      FLOAT8[],       -- q_j OR p_(j-1)
     dimension   INT4            -- row_dim OR col_dim
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_sparse_lanczos_sfunc'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_lanczos_prefunc
 (
     state1  FLOAT8[],
     state2  FLOAT8[]
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_lanczos_prefunc'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------------------------------------------
 ---------------------For Array-Format Matrix-------------------------
 ---------------------------------------------------------------------
 DROP AGGREGATE IF EXISTS
 MADLIB_SCHEMA.__svd_lanczos_agg
 (
     INT,        -- Matrix row id
     FLOAT8[],   -- Matrix row array
     FLOAT8[],   -- q_j OR p_(j-1)
     INT         -- row_dim OR col_dim
 );

 CREATE AGGREGATE
 MADLIB_SCHEMA.__svd_lanczos_agg
 (
     INT,        -- Matrix row id
     FLOAT8[],   -- Matrix row array
     FLOAT8[],   -- q_j OR p_(j-1)
     INT         -- row_dim OR col_dim
 )
 (
     stype = FLOAT8[],
     sfunc = MADLIB_SCHEMA.__svd_lanczos_sfunc
     m4_ifdef(
         `__POSTGRESQL__', `',
         `, prefunc = MADLIB_SCHEMA.__svd_lanczos_prefunc'
     )
 );

 ---------------------------------------------------------------------
 ---------------------For Block-Format Matrix-------------------------
 ---------------------------------------------------------------------
 DROP AGGREGATE IF EXISTS
 MADLIB_SCHEMA.__svd_block_lanczos_agg
 (
     INT4,       -- Matrix block row id
     INT4,       -- Matrix block col id
     FLOAT8[],   -- Matrix block
     FLOAT8[],   -- q_j OR p_(j-1)
     INT4        -- row_dim OR col_dim
 );

 CREATE AGGREGATE
 MADLIB_SCHEMA.__svd_block_lanczos_agg
 (
     INT,        -- Matrix block row id
     INT4,       -- Matrix block col id
     FLOAT8[],   -- Matrix row array
     FLOAT8[],   -- q_j OR p_(j-1)
     INT         -- row_dim OR col_dim
 )
 (
     stype = FLOAT8[],
     --Note that only the sfunc is different
     sfunc = MADLIB_SCHEMA.__svd_block_lanczos_sfunc
     m4_ifdef(
         `__POSTGRESQL__', `',
         `, prefunc = MADLIB_SCHEMA.__svd_lanczos_prefunc'
     )
 );

 ---------------------------------------------------------------------
 ---------------------For Sparse-Format Matrix-------------------------
 ---------------------------------------------------------------------
 DROP AGGREGATE IF EXISTS
 MADLIB_SCHEMA.__svd_sparse_lanczos_agg
 (
     INT4,       -- Row ID
     INT4,       -- Column ID
     FLOAT8,     -- Value
     FLOAT8[],   -- q_j OR p_(j-1)
     INT4        -- row_dim OR col_dim
 );

 CREATE AGGREGATE
 MADLIB_SCHEMA.__svd_sparse_lanczos_agg
 (
     INT4,       -- Row ID
     INT4,       -- Column ID
     FLOAT8,     -- Value
     FLOAT8[],   -- q_j OR p_(j-1)
     INT4        -- row_dim OR col_dim
 )
 (
     stype = FLOAT8[],
     --Note that only the sfunc is different
     sfunc = MADLIB_SCHEMA.__svd_sparse_lanczos_sfunc
     m4_ifdef(
         `__POSTGRESQL__', `',
         `, prefunc = MADLIB_SCHEMA.__svd_lanczos_prefunc'
     )
 );

 ---------------------------------------------------------------------
 --------Postproc Function for Computing Lanzcos P/V Vectors---------
 ---------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_lanczos_pvec
 (
     vector  FLOAT8[],   -- Partial result from the aggregator
     pvec    FLOAT8[],   -- Previous P vector
     beta    FLOAT8      -- Previous beta
 )
 RETURNS MADLIB_SCHEMA.__svd_lanczos_result
 AS 'MODULE_PATHNAME', 'svd_lanczos_pvec'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_lanczos_qvec
 (
     vector  FLOAT8[],   -- Partial result from the aggregator
     qvec    FLOAT8[],   -- Previous P vector
     alpha   FLOAT8      -- Previous beta
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_lanczos_qvec'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 ---------------------------------------------------------------------
 -----------------Gram-Schmidt Orthogonilization----------------------
 ---------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_sfunc
 (
     state                   FLOAT8[],
     vec_unorthogonalized    FLOAT8[],
     vec_orthogonalized      FLOAT8[]
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_gram_schmidt_orthogonalize_sfunc'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_prefunc
 (
     state1  FLOAT8[],
     state2  FLOAT8[]
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_gram_schmidt_orthogonalize_prefunc'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 -- This function will also do the normalization
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_ffunc
 (
     state                   FLOAT8[]
 )
 RETURNS MADLIB_SCHEMA.__svd_lanczos_result
 AS 'MODULE_PATHNAME', 'svd_gram_schmidt_orthogonalize_ffunc'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 DROP AGGREGATE IF EXISTS
 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_agg
 (
     FLOAT8[],   -- Unorthogonalized vector
     FLOAT8[]    -- Orthogonalized vector
 );

 CREATE AGGREGATE
 MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_agg
 (
     FLOAT8[],   -- Unorthogonalized vector
     FLOAT8[]    -- Orthogonalized vector
 )
 (
     stype = FLOAT8[],
     sfunc = MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_sfunc,
     finalfunc = MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_ffunc
     m4_ifdef(
         `__POSTGRESQL__', `',
         `, prefunc = MADLIB_SCHEMA.__svd_gram_schmidt_orthogonalize_prefunc'
     )
 );

 ---------------------------------------------------------------------
 --------------------Lanczos Bidiagonalization------------------------
 ---------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_lanczos_bidiagonalize
 (
     source_table        TEXT,
     output_table_prefix TEXT,
     n_iterations        INT,
     k                   INT,
     is_block            BOOLEAN
 )
 RETURNS VOID AS $$
     PythonFunctionBodyOnly(`linalg', `svd')
     return svd.lanczos_bidiagonalize(
         schema_madlib, source_table,
         output_table_prefix, n_iterations, k, is_block)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.__svd_lanczos_bidiagonalize_sparse
 (
     source_table        TEXT,
     row_id              TEXT,
     col_id              TEXT,
     val                 TEXT,
     output_table_prefix TEXT,
     n_iterations        INT,
     k                   INT
 )
 RETURNS VOID AS $$
     PythonFunctionBodyOnly(`linalg', `svd')
     return svd.lanczos_bidiagonalize_sparse(
         schema_madlib, source_table, row_id, col_id,
         val, output_table_prefix, n_iterations, k)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ---------------------------------------------------------------------
 -------------------- SVD of Bidiagonal matrix ------------------------
 ---------------------------------------------------------------------
 DROP TYPE IF EXISTS MADLIB_SCHEMA.__svd_bidiagonal_matrix_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.__svd_bidiagonal_matrix_result AS
 (
     left_matrix     FLOAT8[][],
     right_matrix    FLOAT8[][],
     singular_values FLOAT8[]
 );

 ------------------------------------------------------------------------------
 --The bidiagonal matrix is represented as a triple: <row_ids, col_ids, vals>
 --where:
 --  row_ids is an array of integers representing row_ids of non-zero elements
 --  col_ids is an array of integers representing col_ids of non-zero elements
 --  vals is an array of doubles representing values of non-zero elements
 --Note that we don't need the aggregator to convert the sparse bidiagonal
 --matrix into a dense matrix
 ------------------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_decompose_bidiag(
     row_ids INT[],
     col_ids INT[],
     vals    FLOAT8[]
 )
 RETURNS MADLIB_SCHEMA.__svd_bidiagonal_matrix_result
 AS 'MODULE_PATHNAME', 'svd_decompose_bidiag'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 -----------------------------------------------------------------------
 -- Special Vector-Matrix Multiplication Functions
 -----------------------------------------------------------------------
 /**
  * Multiplication of a row vector with an in-memory matrix
  * vector: 1 x l
  * matrix: l x l
  * k: Sub-matrix of the size l x k
  */
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_vec_mult_matrix(
     vector  FLOAT8[],
     matrix  FLOAT8[][],
     k       INT4
 )
 RETURNS FLOAT8[]
 AS 'MODULE_PATHNAME', 'svd_vec_mult_matrix'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 DROP TYPE IF EXISTS MADLIB_SCHEMA.__svd_vec_mat_mult_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.__svd_vec_mat_mult_result AS
 (
     row_id  INT4,
     row_vec FLOAT8[]
 );

 --Multiplication of a column vector with an in-memory matrix
 --Return a set of m row vector of the size 1 * k
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_vec_trans_mult_matrix_internal(
     vector  FLOAT8[],   -- m * l row vector
     matrix  FLOAT8[][], -- l * l in-memory matrix
     col_id  INT4,       -- Column ID
     k       INT4        -- Specify the size of submatrix l * k
 )
 RETURNS SETOF MADLIB_SCHEMA.__svd_vec_mat_mult_result
 AS 'MODULE_PATHNAME', 'svd_vec_trans_mult_matrix'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

 --Multiplication of P/Q vectors with Left/Right Singular Matrix of B
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svd_vec_trans_mult_matrix(
     vec_table   TEXT,       -- P/Q column vectors
     mat_table   TEXT,       -- (svd_output).left_matrix/right_matrix
     k           INT4,       -- Number of singular values
     is_left     BOOLEAN,    -- Left matrix?
     res_table   TEXT        -- Result
 )
 RETURNS VOID AS $$
     PythonFunctionBodyOnly(`linalg', `svd')
     return svd.svd_vec_trans_mult_matrix(
         schema_madlib, vec_table, mat_table, k, res_table, is_left)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -----------------------------------------------------------------------
 -- Help functions
 -----------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svd(
     input_message            TEXT
 )
 RETURNS TEXT AS $$
 PythonFunctionBodyOnly(`linalg', `svd')
     return svd.svd_help_message(schema_madlib, input_message)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svd()
 RETURNS TEXT AS $$
 PythonFunctionBodyOnly(`linalg', `svd')
     return svd.svd_help_message(schema_madlib, None)
 $$ LANGUAGE plpythonu
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');