blob: 3c74451c80c57f0ab7ecc1322d25bb12f75d9ec8 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file linalg.sql_in
*
* @brief SQL functions for linear algebra
*
* @sa For an overview of linear-algebra functions, see the module
* description \ref grp_linalg.
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
m4_changequote(`<!', `!>')
/**
@addtogroup grp_linalg
<div class="toc"><b>Contents</b>
<ul>
<li class="level1"><a href="#functions">Linear Algebra Utility Functions</a></li>
<li class="level1"><a href="#examples">Examples</a></li>
<li class="level1"><a href="#literature">Literature</a></li>
<li class="level1"><a href="#related">Related Functions</a></li>
</ul>
</div>
@brief Provides utility functions for basic linear algebra operations.
The linalg module consists of useful utility functions for basic linear
algebra operations. Several of these functions can be used
while implementing new algorithms. These functions operate on vectors
(1-D FLOAT8 array) and matrices (2-D FLOAT8 array). Note that other array
types may need to be casted into FLOAT8[] before calling the functions.
Refer to the linalg.sql_in file for documentation on each of the utility functions.
@anchor functions
@par Linear Algebra Utility Functions
<table class="output">
<tr><th>norm1()</th></tr>
<td>1-norm of a vector, \f$\|\vec{a}\|_1\f$.</td>
<tr><th>norm2()</th>
<td>2-norm of a vector, \f$\|\vec{a}\|_2\f$. </td></tr>
<tr><th>dist_norm1()</th>
<td>1-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_1\f$. </td></tr>
<tr><th>dist_norm2()</th>
<td>2-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_2\f$. </td></tr>
<tr><th>dist_pnorm()</th>
<td>Generic p-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_p, p > 0\f$. </td></tr>
<tr><th>dist_inf_norm()</th>
<td>Infinity-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_\infty\f$. </td></tr>
<tr><th>squared_dist_norm2()</th>
<td>Squared 2-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_2^2\f$. </td></tr>
<tr><th>cosine_similarity()</th>
<td>Cosine score between two vectors, \f$\frac{\vec{a} \cdot \vec{b}}{\|\vec{a}\|_2 \|\vec{b}\|_2}\f$. </td></tr>
<tr><th>dist_angle()</th>
<td>Angle between two vectors in an Euclidean space, \f$\cos^{-1}(\frac{\vec{a} \cdot \vec{b}}{\|\vec{a}\|_2 \|\vec{b}\|_2})\f$. </td></tr>
<tr><th>dist_tanimoto()</th>
<td>Tanimoto distance between two vectors. [1] </td></tr>
<tr><th>dist_jaccard()</th>
<td>Jaccard distance between two varchar vectors treated as sets. </td></tr>
<tr><th>get_row()</th>
<td>Return the indexed row of a matrix (2-D array). </td></tr>
<tr><th>get_col()</th>
<td>Return the indexed col of a matrix (2-D array). </td></tr>
<tr><th>avg()</th>
<td>Compute the average of vectors. </td></tr>
<tr><th>normalized_avg()</th>
<td>Compute the normalized average of vectors (unit vector in an Euclidean space). </td></tr>
<tr><th>matrix_agg()</th>
<td>Combine vectors to a matrix. </td></tr>
</table>
@anchor examples
@exampo
<b>Vector Norms and Distances</b>
-# Create a database table with two vector columns and add some data.
<pre class="example">
CREATE TABLE two_vectors(
id integer,
a float8[],
b float8[]);
</pre>
<pre class="example">
INSERT INTO two_vectors VALUES
(1, '{3,4}', '{4,5}'),
(2, '{1,1,0,-4,5,3,4,106,14}', '{1,1,0,6,-3,1,2,92,2}');
</pre>
-# Invoke norm functions.
<pre class="example">
SELECT
id,
madlib.norm1(a),
madlib.norm2(a)
FROM two_vectors;
</pre>
Result:
<pre class="result">
id | norm1 | norm2
----+-------+------------------
1 | 7 | 5
2 | 138 | 107.238052947636
(2 rows)
</pre>
-# Invoke distance functions.
<pre class="example">
SELECT
id,
madlib.dist_norm1(a, b),
madlib.dist_norm2(a, b),
madlib.dist_pnorm(a, b, 5) AS norm5,
madlib.dist_inf_norm(a, b),
madlib.squared_dist_norm2(a, b) AS sq_dist_norm2,
madlib.cosine_similarity(a, b),
madlib.dist_angle(a, b),
madlib.dist_tanimoto(a, b),
madlib.dist_jaccard(a::text[], b::text[])
FROM two_vectors;
</pre>
Result:
<pre class="result">
id | dist_norm1 | dist_norm2 | norm5 | dist_inf_norm | sq_dist_norm2 | cosine_similarity | dist_angle | dist_tanimoto | dist_jaccard
----+------------+------------------+------------------+---------------+---------------+-------------------+--------------------+--------------------+-------------------
1 | 2 | 1.4142135623731 | 1.14869835499704 | 1 | 2 | 0.999512076087079 | 0.0312398334302684 | 0.0588235294117647 | 0.666666666666667
2 | 48 | 22.6274169979695 | 15.585086360695 | 14 | 512 | 0.985403348449008 | 0.17106899659286 | 0.0498733684005455 | 0.833333333333333
(2 rows)
</pre>
<b>Matrix Functions</b>
-# Create a database table with a matrix column.
<pre class="example">
CREATE TABLE matrix(
id integer,
m float8[]);
</pre>
<pre class="example">
INSERT INTO matrix VALUES
(1, '{{4,5},{3,5},{9,0}}');
</pre>
-# Invoke matrix functions.
<pre class="example">
SELECT
madlib.get_row(m, 1) AS row_1,
madlib.get_row(m, 2) AS row_2,
madlib.get_row(m, 3) AS row_3,
madlib.get_col(m, 1) AS col_1,
madlib.get_col(m, 2) AS col_2
FROM matrix;
</pre>
Result:
<pre class="result">
row_1 | row_2 | row_3 | col_1 | col_2
-------+-------+-------+---------+---------
{4,5} | {3,5} | {9,0} | {4,3,9} | {5,5,0}
(1 row)
</pre>
<b>Aggregate Functions</b>
-# Create a database table with a vector column.
<pre class="example">
CREATE TABLE vector(
id integer,
v float8[]);
</pre>
<pre class="example">
INSERT INTO vector VALUES
(1, '{4,3}'),
(2, '{8,6}'),
(3, '{12,9}');
</pre>
-# Invoke aggregate functions.
<pre class="example">
SELECT
madlib.avg(v),
madlib.normalized_avg(v),
madlib.matrix_agg(v)
FROM vector;
</pre>
Result:
<pre class="result">
avg | normalized_avg | matrix_agg
-------+----------------+----------------------
{8,6} | {0.8,0.6} | {{4,3},{8,6},{12,9}}
(1 row)
</pre>
@anchor literature
@literature
[1] http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_similarity_and_distance
@anchor related
@par Related Topics
File linalg.sql_in documenting the SQL functions.
*/
/**
* @brief 1-norm of a vector
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @return \f$ \| x \|_1 = \sum_{i=1}^n |x_i| \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.norm1(
x DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief 2-norm of a vector
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @return \f$ \| x \|_2 = \sqrt{\sum_{i=1}^n x_i^2} \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.norm2(
x DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Infinity-norm of the difference between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ \| x - y \|_\infty = \max_{i=1}^n \|x_i - y_i\| \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_inf_norm(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief p-norm of the difference between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @param p Scalar \f$ p > 0 \f$
* @return \f$ \| x - y \|_p = (\sum_{i=1}^n \|x_i - y_i\|^p)^{\frac{1}{p}} \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_pnorm(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[],
p DOUBLE PRECISION
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief 1-norm of the difference between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ \| x - y \|_1 = \sum_{i=1}^n |x_i - y_i| \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_norm1(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief 2-norm of the difference between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ \| x - y \|_2 = \sqrt{\sum_{i=1}^n (x_i - y_i)^2} \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_norm2(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief cosine similarity score between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ \frac{\langle \vec x, \vec y \rangle}
* {\| \vec x \| \cdot \| \vec y \|} \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cosine_similarity(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Squared 2-norm of the difference between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ \| x - y \|_2^2 = \sum_{i=1}^n (x_i - y_i)^2 \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.squared_dist_norm2(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Angle between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ \arccos\left(\frac{\langle \vec x, \vec y \rangle}
* {\| \vec x \| \cdot \| \vec y \|}\right) \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_angle(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Tanimoto distance between two vectors
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ 1 - \frac{\langle \vec x, \vec y \rangle}
* {\| \vec x \|^2 \cdot \| \vec y \|^2
* - \langle \vec x, \vec y \rangle} \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_tanimoto(
x DOUBLE PRECISION[],
y DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Jaccard distance between two vectors (treated as sets)
*
* @param x Vector \f$ \vec x = (x_1, \dots, x_m) \f$
* @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$
* @return \f$ 1 - \frac{|x \cap y|}{|x \cup y|} \f$
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_jaccard(
x TEXT[],
y TEXT[]
) RETURNS DOUBLE PRECISION
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/*
* @brief closest_column return type
*/
DROP TYPE IF EXISTS MADLIB_SCHEMA.closest_column_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.closest_column_result AS (
column_id INTEGER,
distance DOUBLE PRECISION
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._closest_column(
M DOUBLE PRECISION[],
x DOUBLE PRECISION[],
dist REGPROC,
dist_dn TEXT
) RETURNS MADLIB_SCHEMA.closest_column_result AS
'MODULE_PATHNAME', 'closest_column'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(<!__HAS_FUNCTION_PROPERTIES__!>, <!NO SQL!>, <!!>);
-- Because of Jira MPP-23166, ORCA makes the following
-- function extremely slow because "NO SQL" now becomes
-- "CONTAINS SQL". This is why we disabled the optimizer
-- in kmeans.
/**
* @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the column
* of \f$ M \f$ that is closest to \f$ \vec x \f$
*
* @param M Matrix \f$ M = (\vec{m_0} \dots \vec{m_{l-1}})
* \in \mathbb{R}^{k \times l} \f$
* @param x Vector \f$ \vec x \in \mathbb R^k \f$
* @param dist The metric \f$ \operatorname{dist} \f$. This needs to be a
* function with signature
* <tt>DOUBLE PRECISION[] x DOUBLE PRECISION[] -> DOUBLE PRECISION</tt>.
*
* @returns A composite value:
* - <tt>columns_id INTEGER</tt> - The 0-based index of the column of \f$ M \f$
* that is closest to \f$ x \f$. In case of ties, the first such index is
* returned. That is, \c columns_id is the minimum element in the set
* \f$ \arg\min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$.
* - <tt>distance DOUBLE PRECISION</tt> - The minimum distance between any
* column of \f$ M \f$ and \f$ x \f$. That is,
* \f$ \min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_column(
M DOUBLE PRECISION[],
x DOUBLE PRECISION[],
dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */
) RETURNS MADLIB_SCHEMA.closest_column_result AS $$
SELECT MADLIB_SCHEMA._closest_column($1, $2, $3, textin(regprocout($3)))
$$ LANGUAGE SQL IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_column(
M DOUBLE PRECISION[],
x DOUBLE PRECISION[]
) RETURNS MADLIB_SCHEMA.closest_column_result
IMMUTABLE
STRICT
LANGUAGE sql
AS $$
SELECT MADLIB_SCHEMA.closest_column($1, $2,
'MADLIB_SCHEMA.squared_dist_norm2')
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL');
/*
* @brief closest_columns return type
*/
DROP TYPE IF EXISTS MADLIB_SCHEMA.closest_columns_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.closest_columns_result AS (
column_ids INTEGER[],
distances DOUBLE PRECISION[]
);
/**
* @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the columns
* of \f$ M \f$ that are closest to \f$ \vec x \f$
*
* This function does essentially the same as \ref closest_column(), except that
* it allows to specify the number of closest columns to return. The return
* value is a composite value:
* - <tt>columns_ids INTEGER[]</tt> - The 0-based indices of the \c num columns
* of \f$ M \f$ that are closest to \f$ x \f$. In case of ties, the first
* such indices are returned.
* - <tt>distances DOUBLE PRECISION[]</tt> - The distances between the columns
* of \f$ M \f$ with indices in \c columns_ids and \f$ x \f$. That is,
* <tt>distances[i]</tt> contains
* \f$ \operatorname{dist}(\vec{m_j}, \vec x) \f$, where \f$ j = \f$
* <tt>columns_ids[i]</tt>.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._closest_columns(
M DOUBLE PRECISION[],
x DOUBLE PRECISION[],
num INTEGER,
dist REGPROC,
dist_dn TEXT
) RETURNS MADLIB_SCHEMA.closest_columns_result AS
'MODULE_PATHNAME', 'closest_columns'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(<!__HAS_FUNCTION_PROPERTIES__!>, <!NO SQL!>, <!!>);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_columns(
M DOUBLE PRECISION[],
x DOUBLE PRECISION[],
num INTEGER,
dist REGPROC
) RETURNS MADLIB_SCHEMA.closest_columns_result AS $$
SELECT MADLIB_SCHEMA._closest_columns($1, $2, $3, $4, textin(regprocout($4)))
$$ LANGUAGE SQL IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_columns(
M DOUBLE PRECISION[],
x DOUBLE PRECISION[],
num INTEGER
) RETURNS MADLIB_SCHEMA.closest_columns_result
IMMUTABLE
STRICT
LANGUAGE sql
AS $$
SELECT MADLIB_SCHEMA.closest_columns($1, $2, $3,
'MADLIB_SCHEMA.squared_dist_norm2')
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.avg_vector_transition(
state DOUBLE PRECISION[],
x DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
CALLED ON NULL INPUT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.avg_vector_merge(
state_left DOUBLE PRECISION[],
state_right DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
STRICT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.avg_vector_final(
state DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
STRICT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Compute the average of vectors
*
* Given vectors \f$ x_1, \dots, x_n \f$, compute the average
* \f$ \frac 1n \sum_{i=1}^n x_i \f$.
*
* @param x Point \f$ x_i \f$
* @returns Average \f$ \frac 1n \sum_{i=1}^n x_i \f$
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.avg(DOUBLE PRECISION []);
CREATE AGGREGATE MADLIB_SCHEMA.avg(
/*+ x */ DOUBLE PRECISION[]
) (
STYPE=DOUBLE PRECISION[],
SFUNC=MADLIB_SCHEMA.avg_vector_transition,
m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!PREFUNC=MADLIB_SCHEMA.avg_vector_merge,!>)
FINALFUNC=MADLIB_SCHEMA.avg_vector_final,
INITCOND='{0,0,0}'
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_transition(
state DOUBLE PRECISION[],
x DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
CALLED ON NULL INPUT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_final(
state DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
STRICT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Compute the normalized average of vectors
*
* Given vectors \f$ x_1, \dots, x_n \f$, define
* \f$ \widetilde{x} := \frac 1n \sum_{i=1}^n \frac{x_i}{\| x_i \|} \f$, and
* compute the normalized average
* \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$.
*
* @param x Point \f$ x_i \f$
* @returns Normalized average \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.normalized_avg(DOUBLE PRECISION []);
CREATE AGGREGATE MADLIB_SCHEMA.normalized_avg(
/*+ x */ DOUBLE PRECISION[]
) (
STYPE=DOUBLE PRECISION[],
SFUNC=MADLIB_SCHEMA.normalized_avg_vector_transition,
m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!PREFUNC=MADLIB_SCHEMA.avg_vector_merge,!>)
FINALFUNC=MADLIB_SCHEMA.normalized_avg_vector_final,
INITCOND='{0,0,0}'
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.matrix_agg_transition(
state DOUBLE PRECISION[],
x DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
STRICT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.matrix_agg_final(
state DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
STRICT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Combine vectors to a matrix
*
* Given vectors \f$ \vec x_1, \dots, \vec x_n \in \mathbb R^m \f$,
* return matrix \f$ ( \vec x_1 \dots \vec x_n ) \in \mathbb R^{m \times n}\f$.
*
* @param x Vector \f$ x_i \f$
* @returns Matrix with columns \f$ x_1, \dots, x_n \f$
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.matrix_agg(DOUBLE PRECISION []);
CREATE m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!ORDERED !>)AGGREGATE MADLIB_SCHEMA.matrix_agg(
/*+ x */ DOUBLE PRECISION[]
) (
STYPE=DOUBLE PRECISION[],
SFUNC=MADLIB_SCHEMA.matrix_agg_transition,
FINALFUNC=MADLIB_SCHEMA.matrix_agg_final,
INITCOND='{0,0,0}'
);
/**
* @brief Return the column of a matrix
*
* @param matrix Two-dimensional matrix
* @param col Column of the matrix to return (0-based index)
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.matrix_column(
matrix DOUBLE PRECISION[],
col INTEGER
) RETURNS DOUBLE PRECISION[]
LANGUAGE c
IMMUTABLE
STRICT
AS 'MODULE_PATHNAME'
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
------------------------------
/**
* @brief Construct a M-column N-row table of 2-D array
*
* @param in_array 2-D array
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.deconstruct_2d_array(
in_array double precision[]
) RETURNS SETOF record
AS 'MODULE_PATHNAME', 'deconstruct_2d_array'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
------------------------------
/**
* @brief Construct a M-column M-row table of using lower triangle of 2-D array
*
* @param in_array 2-D array
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__deconstruct_lower_triangle(
in_array double precision[]
) RETURNS SETOF record
AS 'MODULE_PATHNAME', 'deconstruct_lower_triangle'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
------------------- Created for In-Memory Iteration Controller -----------
/**
* @brief Return the input array as 1-D
*
* @param in_array 1-D or 2-D array
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.array_to_1d(
in_array DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME', 'array_to_1d'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Return A 2-D matrix that the number of rows is encoded as the first
* element of the input array and the number of cols second.
*
* @param in_array Input array with first 2 elements describing dimensions
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.array_to_2d(
in_array DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME', 'array_to_2d'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Return A 1-D form of the specified row of the given 2-D array
*
* @param in_2d_array Input 2-D array
* @param index
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.index_2d_array(
in_2d_array double precision[],
index integer
)
RETURNS double precision[]
AS 'MODULE_PATHNAME', 'get_row_from_2d_array'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Get an indexed row of the given matrix (2-D array)
*
* @param in_2d_array Input 2-D array
* @param index
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.get_row(
in_2d_array double precision[],
index integer
)
RETURNS double precision[]
AS 'MODULE_PATHNAME', 'get_row_from_2d_array'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @brief Get an indexed col of the given matrix (2-D array)
*
* @param in_2d_array Input 2-D array
* @param index
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.get_col(
in_2d_array double precision[],
index integer
)
RETURNS double precision[]
AS 'MODULE_PATHNAME', 'get_col_from_2d_array'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
m4_changequote(<!`!>, <!'!>)