blob: 15886f704cc9ece4156f609b752e96da05e3bc72 [file] [log] [blame]
/* ----------------------------------------------------------------------- */
/**
*
* @file dense_linear_systems.sql_in
*
* @brief SQL functions for linear systems
* @date July 2013
*
* @sa Computes the solution of a consistent linear system, for more details
* see the module description at \ref grp_dense_linear_solver
*
*/
/* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_dense_linear_solver
<div class ="toc"><b>Contents</b>
<ul>
<li class="level1"><a href="#dls_usage"> Solution Function</a></li>
<li class="level1"><a href="#dls_opt_params"> Optimizer Parameters</a></li>
<li class="level1"><a href="#dls_examples"> Examples</a></li>
<li class="level1"><a href="#related"> Related Topics</a></li>
</ul>
</div>
@brief Implements solution methods for large dense linear systems. Currently,
restricted to problems that fit in memory.
The linear systems module implements solution methods for systems of consistent
linear equations. Systems of linear equations take the form:
\f[
Ax = b
\f]
where \f$x \in \mathbb{R}^{n}\f$, \f$A \in \mathbb{R}^{m \times n} \f$ and \f$b \in \mathbb{R}^{m}\f$.
We assume that there are no rows of \f$A\f$ where all elements are zero.
The algorithms implemented in this module can handle large dense
linear systems. Currently, the algorithms implemented in this module
solve the linear system by a direct decomposition. Hence, these methods are
known as <em>direct method</em>.
@anchor dls_usage
@par Solution Function
<pre class="syntax">
linear_solver_dense( tbl_source,
tbl_result,
row_id,
LHS,
RHS,
grouping_col,
optimizer,
optimizer_params
)
</pre>
\b Arguments
<DL class="arglist">
<DT>tbl_source</DT>
<DD>TEXT. The name of the table containing the training data.
The input data is expected to be of the following form:
<pre>{TABLE|VIEW} <em>sourceName</em> (
...
<em>row_id</em> FLOAT8,
<em>left_hand_side</em> FLOAT8[],
<em>right_hand_side</em> FLOAT8,
...
)</pre>
Each row represents a single equation. The <em>right_hand_side</em>
column refers to the right hand side of the equations while the
<em>left_hand_side</em> column refers to the multipliers on the variables on
the left hand side of the same equations.</DD>
<DT>tbl_result</DT>
<DD>TEXT. The name of the table where the output is saved. The output is stored in the table named by the <em>tbl_result</em> argument. It contains the following columns:
<table class="output">
<tr>
<th>solution</th>
<td> FLOAT8[]. The solution variables in the same
order as that provided as input in the 'left_hand_side' column name of the
<em>source_table</em>
</td>
</tr><tr>
<th>residual_norm</th>
<td> FLOAT8. The scaled residual norm, defined as \f$ \frac{|Ax - b|}{|b|} \f$.
This value is an indication of the accuracy of the solution.
</td>
</tr><tr>
<th>iters</th>
<td>INTEGER. Number of iterations required by the algorithm (only applicable for
iterative algorithms). The output is NULL for 'direct' methods.
</td>
</tr>
</table>
</DD>
<DT>row_id</DT>
<DD>TEXT. The name of the column storing the 'row id' of the equations.
For a system with N equations, the row_id's must be a continuous range
of integers from \f$ 0 \ldots n-1 \f$.
</dd>
<DT>LHS</DT>
<DD>TEXT. The name of the column storing the 'left hand side' of the
equations, stored as an array.</DD>
<DT>RHS</DT>
<DD>TEXT. The name of the column storing the 'right hand side' of the
equations.</DD>
<DT>grouping_cols (optional) </DT>
<DD>TEXT, default: NULL. Group by column names. <em>Not currently implemented. Any non-NULL value is ignored.</em></DD>
<DT>optimizer (optional) </DT>
<DD>TEXT, default: 'direct'. The type of optimizer.</DD>
<DT>optimizer_params (optional) </DT>
<DD>TEXT, default: NULL. Optimizer specific parameters.</DD>
</DL>
@anchor dls_opt_params
@par Optimizer Parameters
For each optimizer, there are specific parameters that can be tuned
for better performance.
<DL class="arglist">
<DT>algorithm (default: householderqr)</dT>
<DD>
There are several algorithms that can be classified as 'direct' methods
of solving linear systems. MADlib dense linear system solvers provide
various algorithmic options for users.
The following table provides a guideline on the choice of algorithm based
on conditions on the A matrix, speed of the algorithms and numerical stability.
Algorithm | Conditions on A | Speed | Accuracy
----------------------------------------------------------
householderqr | None | ++ | +
partialpivlu | Invertable | ++ | +
fullpivlu | None | - | +++
colpivhouseholderqr | None | + | ++
fullpivhouseholderqr | None | - | +++
llt | Pos. Definite | +++ | +
ldlt | Pos. or Neg Def | +++ | ++
For speed '++' is faster than '+', which is faster than '-'.
For accuracy '+++' is better than '++'.
More details about the individual algorithms can be found in the <a href="http://eigen.tuxfamily.org/dox-devel/group__TutorialLinearAlgebra.html"> Eigen documentation</a>. Eigen is an open source library for linear algebra.
</DD>
</DL>
@anchor dls_examples
@examp
-# View online help for the linear systems solver function.
<pre class="example">
SELECT madlib.linear_solver_dense();
</pre>
-# Create the sample data set.
<pre class="example">
CREATE TABLE linear_systems_test_data( id INTEGER NOT NULL,
lhs DOUBLE PRECISION[],
rhs DOUBLE PRECISION
);
INSERT INTO linear_systems_test_data(id, lhs, rhs)
VALUES
(0, ARRAY[1,0,0], 20),
(1, ARRAY[0,1,0], 15),
(2, ARRAY[0,0,1], 20);
</pre>
-# Solve the linear systems with default parameters.
<pre class="example">
SELECT madlib.linear_solver_dense( 'linear_systems_test_data',
'output_table',
'id',
'lhs',
'rhs'
);
</pre>
-# Obtain the output from the output table.
<pre class="example">
\\x on
SELECT * FROM output_table;
</pre>
Result:
<pre class="result">
--------------------+-------------------------------------
solution | {20,15,20}
residual_norm | 0
iters | NULL
</pre>
-# Choose an algorithm different than the default.
<pre class="example">
DROP TABLE IF EXISTS result_table;
SELECT madlib.linear_solver_dense( 'linear_systems_test_data',
'result_table',
'id',
'lhs',
'rhs',
NULL,
'direct',
'algorithm=llt'
);
</pre>
@anchor related
@par Related Topics
File dense_linear_systems.sql_in documenting the SQL functions
@internal
@sa Namespace \ref madlib::modules::linear_systems documenting the implementation in C++
@endinternal
*/
------------------ Linear Systems ------------------------------
DROP TYPE IF EXISTS MADLIB_SCHEMA.dense_linear_solver_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.dense_linear_solver_result AS (
solution DOUBLE PRECISION[],
residual_norm DOUBLE PRECISION,
iters INTEGER
);
DROP TYPE IF EXISTS MADLIB_SCHEMA.residual_norm_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.residual_norm_result AS (
residual_norm DOUBLE PRECISION
);
------------------------ Compute the residuals ------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_transition(
state MADLIB_SCHEMA.bytea8,
a DOUBLE PRECISION[],
b DOUBLE PRECISION,
x DOUBLE PRECISION[])
RETURNS MADLIB_SCHEMA.bytea8
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_merge_states(
state1 MADLIB_SCHEMA.bytea8,
state2 MADLIB_SCHEMA.bytea8)
RETURNS MADLIB_SCHEMA.bytea8
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_residual_norm_final(
state MADLIB_SCHEMA.bytea8)
RETURNS MADLIB_SCHEMA.residual_norm_result
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
/**
* @brief Compute the residual after solving the dense linear systems
*
* @param left_hand_side Column containing the left hand side of the system
* @param right_hand_side Column containing the right hand side of the system
* @param solution Solution of the linear system
*
*
* @return residual_norm FLOAT8:
*
* @usage
* - Get all the diagnostic statistics:\n
*
* <pre> SELECT dense_residual_norm(<em>row_id</em>,
* <em>left_hand_side</em>,
* <em> right_hand_side </em>,
* <em> solution </em>)
* FROM <em>dataTable</em>;
* </pre>
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.dense_residual_norm(
/*+ "left_hand_side" */ DOUBLE PRECISION[],
/*+ "right_hand_side" */ DOUBLE PRECISION,
/*+ "solution" */ DOUBLE PRECISION[]
);
CREATE AGGREGATE MADLIB_SCHEMA.dense_residual_norm(
/*+ "left_hand_side" */ DOUBLE PRECISION[],
/*+ "right_hand_side" */ DOUBLE PRECISION,
/*+ "solution" */ DOUBLE PRECISION[])(
STYPE=MADLIB_SCHEMA.bytea8,
SFUNC=MADLIB_SCHEMA.dense_residual_norm_transition,
m4_ifdef(`__POSTGRESQL__', `', `PREFUNC=MADLIB_SCHEMA.dense_residual_norm_merge_states,')
FINALFUNC=MADLIB_SCHEMA.dense_residual_norm_final,
INITCOND=''
);
------------------ Direct Method ------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_transition(
state DOUBLE PRECISION[],
row_id INTEGER,
a DOUBLE PRECISION[],
b DOUBLE PRECISION,
num_rows INTEGER,
algorithm INTEGER)
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_merge_states(
state1 DOUBLE PRECISION[],
state2 DOUBLE PRECISION[])
RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dense_direct_linear_system_final(
state DOUBLE PRECISION[])
RETURNS MADLIB_SCHEMA.dense_linear_solver_result
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
/**
* @brief Solve a system of linear equations using the direct method
*
* @param row_id Column containing the row_id
* @param left_hand_side Column containing the left hand side of the system
* @param right_hand_side Column containing the right hand side of the system
* @param numEquations Number of equations
* @param algorithm Algorithm used for the dense linear solver
*
*
* @return A composite value:
* - <tt>solution FLOAT8[] </tt> - Array of marginal effects
* - <tt>residual_norm FLOAT8</tt> - Norm of the residual
* - <tt>iters INTEGER</tt> - Iterations taken
*
* @usage
* - Get all the diagnostic statistics:\n
*
* <pre> SELECT linear_system_dense(<em>row_id</em>,
* <em>left_hand_side</em>,
* <em> right_hand_side </em>,
* <em> numEquations </em>)
* FROM <em>dataTable</em>;
* </pre>
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.dense_direct_linear_system(
/*+ "row_id" */ INTEGER,
/*+ "left_hand_side" */ DOUBLE PRECISION[],
/*+ "right_hand_side" */ DOUBLE PRECISION,
/*+ "numEquations" */ INTEGER,
/*+ "algorithm" */ INTEGER
);
CREATE AGGREGATE MADLIB_SCHEMA.dense_direct_linear_system(
/*+ "row_id" */ INTEGER,
/*+ "left_hand_side" */ DOUBLE PRECISION[],
/*+ "right_hand_side" */ DOUBLE PRECISION,
/*+ "numEquations" */ INTEGER,
/*+ "algorithm" */ INTEGER)(
STYPE=DOUBLE PRECISION[],
SFUNC=MADLIB_SCHEMA.dense_direct_linear_system_transition,
m4_ifdef(`__POSTGRESQL__', `', `PREFUNC=MADLIB_SCHEMA.dense_direct_linear_system_merge_states,')
FINALFUNC=MADLIB_SCHEMA.dense_direct_linear_system_final,
INITCOND='{0,0,0,0,0,0}'
);
--------------------------- Interface ----------------------------------
/**
* @brief Help function, to print out the supported families
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
input_string VARCHAR
)
RETURNS VARCHAR AS $$
PythonFunction(linear_systems, dense_linear_systems, linear_solver_dense_help)
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense()
RETURNS VARCHAR AS $$
BEGIN
RETURN MADLIB_SCHEMA.linear_solver_dense('');
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
/**
@brief A wrapper function for the various marginal linear_systemsion analyzes.
*
* @param source_table String identifying the input table
* @param out_table String identifying the output table to be created
* @param row_id Column containing the row_id
* @param left_hand_side Column containing the left hand side of the system
* @param right_hand_side Column containing the right hand side of the system
* @param grouping_cols Columns to group by
* @param optimizer Optimizer to be used
* @param optimizer_options Optimal parameters for the algorithms
*
*
* @return void
*
* @usage
* For function summary information. Run
* sql> select linear_solver_dense('help');
* OR
* sql> select linear_solver_dense();
* OR
* sql> select linear_solver_dense('?');
* For function usage information. Run
* sql> select linear_solver_dense('usage');
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
source_table VARCHAR -- name of input table
, out_table VARCHAR -- name of output table
, row_id VARCHAR -- name of the column containing row_id
, left_hand_side VARCHAR -- name of columns with lhs
, right_hand_side VARCHAR -- name of columns with rhs
, grouping_cols VARCHAR -- name of columns to group by
, optimizer VARCHAR -- Name of the optimizer
, optimizer_options VARCHAR -- Optimal parameters of the optimizer
)
RETURNS VOID AS $$
PythonFunction(linear_systems, dense_linear_systems, linear_solver_dense)
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-- Default Variable calls for linear_solver_dense
------------------------------------------------------------------------------
/**
* @brief Marginal effects with default variables
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
source_table VARCHAR -- name of input table
, out_table VARCHAR -- name of output table
, row_id VARCHAR -- name of the column containing row_id
, left_hand_side VARCHAR -- name of columns with lhs
, right_hand_side VARCHAR -- name of columns with rhs
)
RETURNS VOID AS $$
BEGIN
PERFORM MADLIB_SCHEMA.linear_solver_dense(
source_table,
out_table,
row_id,
left_hand_side,
right_hand_side,
NULL,
'direct',
NULL);
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
/**
* @brief Marginal effects with default variables
**/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
source_table VARCHAR -- name of input table
, out_table VARCHAR -- name of output table
, row_id VARCHAR -- name of the column containing row_id
, left_hand_side VARCHAR -- name of columns with lhs
, right_hand_side VARCHAR -- name of columns with rhs
, grouping_cols VARCHAR -- name of columns to group by
)
RETURNS VOID AS $$
BEGIN
PERFORM MADLIB_SCHEMA.linear_solver_dense(
source_table,
out_table,
row_id,
left_hand_side,
right_hand_side,
grouping_cols,
'direct',
NULL);
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
/**
* @brief Marginal effects with default variables
**/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_solver_dense(
source_table VARCHAR -- name of input table
, out_table VARCHAR -- name of output table
, row_id VARCHAR -- name of the column containing row_id
, left_hand_side VARCHAR -- name of columns with lhs
, right_hand_side VARCHAR -- name of columns with rhs
, grouping_cols VARCHAR -- name of columns to group by
, optimizer VARCHAR -- Name of the optimizer
)
RETURNS VOID AS $$
BEGIN
PERFORM MADLIB_SCHEMA.linear_solver_dense(
source_table,
out_table,
row_id,
left_hand_side,
right_hand_side,
grouping_cols,
optimizer,
NULL);
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');