| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file dense_linear_sytems.sql_in |
| * |
| * @brief SQL functions for linear systems |
| * @date January 2011 |
| * |
| * @sa Computes the solution of a consistent linear system |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| |
| /** |
| @addtogroup grp_dense_linear_solver |
| |
| |
| <div class ="toc"><b>Contents</b> |
| <ul> |
| <li class="level1"><a href="#dls_about">About</a></li> |
| <li class="level1"><a href="#dls_online_help">Online Help</a></li> |
| <li class="level1"><a href="#dls_function">Function Syntax</a></li> |
| <li class="level1"><a href="#dls_args">Arguments</a></li> |
| <li class="level1"><a href="#dls_opt_params">Optimizer Parameters</a></li> |
| <li class="level1"><a href="#dls_output">Output Tables</a></li> |
| <li class="level1"><a href="#dls_examples">Examples</a></li> |
| </ul> |
| </div> |
| |
| @anchor dls_about |
| @about |
| |
| The linear systems module implements solution methods for systems of consistent |
| linear equations. Systems of linear equations take the form: |
| \f[ |
| Ax = b |
| \f] |
| |
| where \f$x \in \mathbb{R}^{n}\f$, \f$A \in \mathbb{R}^{m \times n} \f$ and \f$b \in \mathbb{R}^{m}\f$. |
| We assume that there are no rows of \f$A\f$ where all elements are zero. |
| The algorithms implemented in this module can handle large dense |
| linear systems. Currently, the algorithms implemented in this module |
| solve the linear system by a direct decomposition. Hence, these methods are |
| known as <em>direct method</em>. |
| |
| @anchor dls_online_help |
| @par Online Help |
| |
| View short help messages using the following statements: |
| @verbatim |
| -- Summary of dense linear systems |
| SELECT madlib.linear_solver_dense(); |
| |
| -- Function syntax and output table format |
| SELECT madlib.linear_solver_dense('usage'); |
| |
| -- Syntax for direct methods |
| SELECT madlib.linear_solver_dense('direct'); |
| @endverbatim |
| |
| @anchor dls_function |
| @par Function Syntax |
| |
| <pre> |
| SELECT linear_solver_dense(tbl_source, tbl_result, row_id, LHS, |
| RHS, grouping_col := NULL, optimizer := 'direct', |
| optimizer_params := 'algorithm = householderqr'); |
| </pre> |
| |
| @anchor dls_args |
| @par Arguments |
| |
| <DL class="arglist"> |
| <DT>tbl_source</DT> |
| <DD>Text value. The name of the table containing the training data. |
| The input data is expected to be of the following form: |
| <pre>{TABLE|VIEW} <em>sourceName</em> ( |
| ... |
| <em>row_id</em> FLOAT8, |
| <em>left_hand_side</em> FLOAT8[], |
| <em>right_hand_side</em> FLOAT8, |
| ... |
| )</pre> |
| |
| Here, each row represents a single equation using. The <em> right_hand_side |
| </em> refers |
| to the right hand side of the equations while the <em> left_hand_side </em> |
| refers to the multipliers on the variables on the left hand side of the same |
| equations. |
| </DD> |
| |
| <DT>tbl_result</DT> |
| <DD>Text value. The name of the table where the output is saved.</DD> |
| |
| <DT>row_id</DT> |
| <DD>Text value. The name of the column storing the 'row id' of the equations.</DD> |
| |
| \note For a system with N equations, the row_id's must be a continuous |
| range of integers from \f$ 0 \ldots n-1 \f$. |
| |
| <DT>LHS</DT> |
| <DD>Text value. The name of the column storing the 'left hand side' of the |
| equations stored as an array.</DD> |
| |
| <DT>RHS</DT> |
| <DD>Text value. The name of the column storing the 'right hand side' of the |
| equations.</DD> |
| |
| <DT>grouping_col (optional) </DT> |
| <DD>Text value. Group by columns. Default: NULL.</DD> |
| \note The grouing columns is a placeholder in MADlib V1.1 |
| |
| <DT>optimizer (optional) </DT> |
| <DD>Text value. Type of optimizer. Default: 'direct'.</DD> |
| |
| <DT>optimizer_params (optional) </DT> |
| <DD>Text value. Optimizer specific parameters. Default: NULL.</DD> |
| </DL> |
| |
| @anchor dls_opt_params |
| @par Optimizer Parameters |
| |
| For each optimizer, there are specific parameters that can be tuned |
| for better performance. |
| \par |
| <DL class="arglist"> |
| <DT>algorithm (default: householderqr)</dT> |
| <DD> |
| |
| There are several algorithms that can be classified as 'direct' methods |
| of solving linear systems. MADlib dense linear system solvers provide |
| various algorithmic options for users. |
| |
| The following table provides a guideline on the choice of algorithm based |
| on conditions on the A matrix, speed of the algorithms and numerical stability. |
| |
| Algorithm | Contitions on A | Speed | Accuracy |
| ---------------------------------------------------------- |
| householderqr | None | ++ | + |
| partialpivlu | Invertable | ++ | + |
| fullpivlu | None | - | +++ |
| colpivhouseholderqr | None | + | ++ |
| fullpivhouseholderqr | None | - | +++ |
| llt | Pos. Definite | +++ | + |
| ldlt | Pos. or Neg Def | +++ | ++ |
| |
| For speed '++' is faster than '+', which is faster than '-'. |
| For accuracy '+++' is better than '++'. |
| |
| More details about the individual algorithms can be found on the <a href="http://eigen.tuxfamily.org/dox-devel/group__TutorialLinearAlgebra.html"> Eigen documentation</a>. Eigen is an open source library for linear algebra. |
| |
| |
| </DD> |
| </DL> |
| |
| @anchor dls_output |
| @par Output statistics |
| Output is stored in the <em>tbl_result</em> table. |
| <DL class="arglist"> |
| <DT>solution</dT> |
| <DD> |
| The solution is an array (of double precision) with the variables in the same |
| order as that provided as input in the 'left_hand_side' column name of the |
| 'source_table' |
| </DD> |
| |
| <DT>residual_norm</dT> |
| <DD> |
| Computes the scaled residual norm, defined as \f$ \frac{|Ax - b|}{|b|} \f$. |
| This value is an indication of the accuracy of the solution. |
| </DD> |
| |
| <DT>iters</dT> |
| <DD>` |
| The number of iterations required by the algorithm (only applicable for |
| iterative algorithms). The output is NULL for |
| 'direct' methods. |
| </DD> |
| |
| </DL> |
| |
| |
| |
| |
| |
| @anchor dls_examples |
| @examp |
| |
| -# Create the sample data set: |
| \verbatim |
| sql> CREATE TABLE linear_systems_test_data (id INTEGER NOT NULL, |
| lhs DOUBLE PRECISION[], |
| rhs DOUBLE PRECISION); |
| sql> INSERT INTO linear_systems_test_data(id, lhs, rhs) VaLUES |
| (0, ARRAY[1,0,0], 20), |
| (1, ARRAY[0,1,0], 15), |
| (2, ARRAY[0,0,1], 20); |
| \endverbatim |
| |
| -# Solve the linear systems with default parameters |
| \verbatim |
| sql> SELECT madlib.linear_solver_dense('linear_systems_test_data', |
| 'output_table', |
| 'id', |
| 'lhs', |
| 'rhs'); |
| \endverbatim |
| |
| -# Obtain the output from the output table |
| \verbatim |
| sql> SELECT * FROM output_table; |
| --------------------+------------------------------------- |
| solution | {20,15,20} |
| residual_norm | 0 |
| iters | NULL |
| \endverbatim |
| |
| |
| -# Chose a different algorithm than the default algorithm |
| \verbatim |
| drop table if exists result_table; |
| select madlib.linear_solver_dense( |
| 'linear_systems_test_data', |
| 'result_table', |
| 'id', |
| 'lhs', |
| 'rhs', |
| NULL, |
| 'direct', |
| 'algorithm=llt' |
| ); |
| \endverbatim |
| |
| |
| @sa File dense_linear_sytems.sql_in documenting the SQL functions. |
| |
| @internal |
| @sa Namespace \ref madlib::modules::linear_systems documenting the implementation in C++ |
| @endinternal |
| */ |
| |
| ------------------ Linear Systems ------------------------------ |
| |
| CREATE TYPE MADLIB_SCHEMA.dense_linear_solver_result AS ( |
| solution DOUBLE PRECISION[], |
| residual_norm DOUBLE PRECISION, |
| iters INTEGER |
| ); |
| |
| CREATE TYPE MADLIB_SCHEMA.residual_norm_result AS ( |
| residual_norm DOUBLE PRECISION |
| ); |
| |
| |
| ------------------------ Compute the residuals ------------------------------ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_transition( |
| state MADLIB_SCHEMA.bytea8, |
| a DOUBLE PRECISION[], |
| b DOUBLE PRECISION, |
| x DOUBLE PRECISION[]) |
| RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_merge_states( |
| state1 MADLIB_SCHEMA.bytea8, |
| state2 MADLIB_SCHEMA.bytea8) |
| RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_final( |
| state MADLIB_SCHEMA.bytea8) |
| RETURNS MADLIB_SCHEMA.residual_norm_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| /** |
| * @brief Compute the residual after solving the dense linear systems |
| * |
| * @param left_hand_side Column containing the left hand side of the system |
| * @param right_hand_side Column containing the right hand side of the system |
| * @param solution Solution of the linear system |
| * |
| * |
| * @return residual_norm FLOAT8: |
| * |
| * @usage |
| * - Get all the diagnostic statistics:\n |
| * |
| * <pre> SELECT dense_residual_norm(<em>row_id</em>, |
| * <em>left_hand_side</em>, |
| * <em> right_hand_side </em>, |
| * <em> solution </em>) |
| * FROM <em>dataTable</em>; |
| * </pre> |
| */ |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.dense_residual_norm( |
| /*+ "left_hand_side" */ DOUBLE PRECISION[], |
| /*+ "right_hand_side" */ DOUBLE PRECISION, |
| /*+ "solution" */ DOUBLE PRECISION[])( |
| STYPE=MADLIB_SCHEMA.bytea8, |
| SFUNC=MADLIB_SCHEMA.dense_residual_norm_transition, |
| m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.dense_residual_norm_merge_states,') |
| FINALFUNC=MADLIB_SCHEMA.dense_residual_norm_final, |
| INITCOND='' |
| ); |
| |
| ------------------ Direct Method ------------------------------ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_transition( |
| state DOUBLE PRECISION[], |
| row_id INTEGER, |
| a DOUBLE PRECISION[], |
| b DOUBLE PRECISION, |
| num_rows INTEGER, |
| algorithm INTEGER) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_merge_states( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[]) |
| RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_final( |
| state DOUBLE PRECISION[]) |
| RETURNS MADLIB_SCHEMA.dense_linear_solver_result |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT; |
| |
| |
| /** |
| * @brief Solve a system of linear equations using the direct method |
| * |
| * @param row_id Column containing the row_id |
| * @param left_hand_side Column containing the left hand side of the system |
| * @param right_hand_side Column containing the right hand side of the system |
| * @param numEquations Number of equations |
| * @param algorithm Algorithm used for the dense linear solver |
| * |
| * |
| * @return A composite value: |
| * - <tt>solution FLOAT8[] </tt> - Array of marginal effects |
| * - <tt>residual_norm FLOAT8</tt> - Norm of the residual |
| * - <tt>iters INTEGER</tt> - Iterations taken |
| * |
| * @usage |
| * - Get all the diagnostic statistics:\n |
| * |
| * <pre> SELECT linear_system_dense(<em>row_id</em>, |
| * <em>left_hand_side</em>, |
| * <em> right_hand_side </em>, |
| * <em> numEquations </em>) |
| * FROM <em>dataTable</em>; |
| * </pre> |
| */ |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.dense_direct_linear_system( |
| /*+ "row_id" */ INTEGER, |
| /*+ "left_hand_side" */ DOUBLE PRECISION[], |
| /*+ "right_hand_side" */ DOUBLE PRECISION, |
| /*+ "numEquations" */ INTEGER, |
| /*+ "algorithm" */ INTEGER)( |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.dense_direct_linear_system_transition, |
| m4_ifdef(`__GREENPLUM__',`PREFUNC=MADLIB_SCHEMA.dense_direct_linear_system_merge_states,') |
| FINALFUNC=MADLIB_SCHEMA.dense_direct_linear_system_final, |
| INITCOND='{0,0,0,0,0,0}' |
| ); |
| |
| |
| --------------------------- Interface ---------------------------------- |
| |
| /** |
| * @brief Help function, to print out the supported families |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense( |
| input_string VARCHAR |
| ) |
| RETURNS VARCHAR AS $$ |
| PythonFunction(linear_systems, dense_linear_systems, linear_solver_dense_help) |
| $$ LANGUAGE plpythonu; |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense() |
| RETURNS VARCHAR AS $$ |
| BEGIN |
| RETURN MADLIB_SCHEMA.linear_solver_dense(''); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| /** |
| @brief A wrapper function for the various marginal linear_systemsion analyzes. |
| * |
| * @param source_table String identifying the input table |
| * @param out_table String identifying the output table to be created |
| * @param row_id Column containing the row_id |
| * @param left_hand_side Column containing the left hand side of the system |
| * @param right_hand_side Column containing the right hand side of the system |
| * @param grouping_cols Columns to group by |
| * @param optimzer Optimizer to be used |
| * @param optimzer_options Optimal parameters for the algorithms |
| * |
| * |
| * @return void |
| * |
| * @usage |
| * For function summary information. Run |
| * sql> select linear_solver_dense('help'); |
| * OR |
| * sql> select linear_solver_dense(); |
| * OR |
| * sql> select linear_solver_dense('?'); |
| * For function usage information. Run |
| * sql> select linear_solver_dense('usage'); |
| * |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , row_id VARCHAR -- name of the column containing row_id |
| , left_hand_side VARCHAR -- name of columns with lhs |
| , right_hand_side VARCHAR -- name of columns with rhs |
| , grouping_cols VARCHAR -- name of columns to group by |
| , optimizer VARCHAR -- Name of the optimizer |
| , optimizer_options VARCHAR -- Optimal parameters of the optimizer |
| ) |
| RETURNS VOID AS $$ |
| PythonFunction(linear_systems, dense_linear_systems, linear_solver_dense) |
| $$ LANGUAGE plpythonu; |
| |
| |
| |
| -- Default Variable calls for linear_solver_dense |
| ------------------------------------------------------------------------------ |
| |
| /** |
| * @brief Marginal effects with default variables |
| **/ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , row_id VARCHAR -- name of the column containing row_id |
| , left_hand_side VARCHAR -- name of columns with lhs |
| , right_hand_side VARCHAR -- name of columns with rhs |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.linear_solver_dense( |
| source_table, |
| out_table, |
| row_id, |
| left_hand_side, |
| right_hand_side, |
| NULL, |
| 'direct', |
| NULL); |
| |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| |
| /** |
| * @brief Marginal effects with default variables |
| **/ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , row_id VARCHAR -- name of the column containing row_id |
| , left_hand_side VARCHAR -- name of columns with lhs |
| , right_hand_side VARCHAR -- name of columns with rhs |
| , grouping_cols VARCHAR -- name of columns to group by |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.linear_solver_dense( |
| source_table, |
| out_table, |
| row_id, |
| left_hand_side, |
| right_hand_side, |
| grouping_cols, |
| 'direct', |
| NULL); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |
| |
| /** |
| * @brief Marginal effects with default variables |
| **/ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense( |
| source_table VARCHAR -- name of input table |
| , out_table VARCHAR -- name of output table |
| , row_id VARCHAR -- name of the column containing row_id |
| , left_hand_side VARCHAR -- name of columns with lhs |
| , right_hand_side VARCHAR -- name of columns with rhs |
| , grouping_cols VARCHAR -- name of columns to group by |
| , optimizer VARCHAR -- Name of the optimizer |
| ) |
| RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.linear_solver_dense( |
| source_table, |
| out_table, |
| row_id, |
| left_hand_side, |
| right_hand_side, |
| grouping_cols, |
| optimizer, |
| NULL); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE; |