| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file linalg.sql_in |
| * |
| * @brief SQL functions for linear algebra |
| * |
| * @sa For an overview of linear-algebra functions, see the module |
| * description \ref grp_linalg. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| m4_changequote(`<!', `!>') |
| |
| /** |
| @addtogroup grp_linalg |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li class="level1"><a href="#functions">Linear Algebra Utility Functions</a></li> |
| <li class="level1"><a href="#examples">Examples</a></li> |
| <li class="level1"><a href="#literature">Literature</a></li> |
| <li class="level1"><a href="#related">Related Functions</a></li> |
| </ul> |
| </div> |
| |
| @brief Provides utility functions for basic linear algebra operations. |
| |
| The linalg module consists of useful utility functions for basic linear |
| algebra operations. Several of these functions can be used |
| while implementing new algorithms. These functions operate on vectors |
| (1-D FLOAT8 array) and matrices (2-D FLOAT8 array). Note that other array |
| types may need to be casted into FLOAT8[] before calling the functions. |
| |
| Refer to the linalg.sql_in file for documentation on each of the utility functions. |
| |
| @anchor functions |
| @par Linear Algebra Utility Functions |
| <table class="output"> |
| |
| <tr><th>norm1()</th></tr> |
| <td>1-norm of a vector, \f$\|\vec{a}\|_1\f$.</td> |
| |
| <tr><th>norm2()</th> |
| <td>2-norm of a vector, \f$\|\vec{a}\|_2\f$. </td></tr> |
| |
| <tr><th>dist_norm1()</th> |
| <td>1-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_1\f$. </td></tr> |
| |
| <tr><th>dist_norm2()</th> |
| <td>2-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_2\f$. </td></tr> |
| |
| <tr><th>dist_pnorm()</th> |
| <td>Generic p-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_p, p > 0\f$. </td></tr> |
| |
| <tr><th>dist_inf_norm()</th> |
| <td>Infinity-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_\infty\f$. </td></tr> |
| |
| <tr><th>squared_dist_norm2()</th> |
| <td>Squared 2-norm of the difference between two vectors, \f$\|\vec{a} - \vec{b}\|_2^2\f$. </td></tr> |
| |
| <tr><th>cosine_similarity()</th> |
| <td>Cosine score between two vectors, \f$\frac{\vec{a} \cdot \vec{b}}{\|\vec{a}\|_2 \|\vec{b}\|_2}\f$. </td></tr> |
| |
| <tr><th>dist_angle()</th> |
| <td>Angle between two vectors in an Euclidean space, \f$\cos^{-1}(\frac{\vec{a} \cdot \vec{b}}{\|\vec{a}\|_2 \|\vec{b}\|_2})\f$. </td></tr> |
| |
| <tr><th>dist_tanimoto()</th> |
| <td>Tanimoto distance between two vectors. [1] </td></tr> |
| |
| <tr><th>dist_jaccard()</th> |
| <td>Jaccard distance between two varchar vectors treated as sets. </td></tr> |
| |
| <tr><th>get_row()</th> |
| <td>Return the indexed row of a matrix (2-D array). </td></tr> |
| |
| <tr><th>get_col()</th> |
| <td>Return the indexed col of a matrix (2-D array). </td></tr> |
| |
| <tr><th>avg()</th> |
| <td>Compute the average of vectors. </td></tr> |
| |
| <tr><th>normalized_avg()</th> |
| <td>Compute the normalized average of vectors (unit vector in an Euclidean space). </td></tr> |
| |
| <tr><th>matrix_agg()</th> |
| <td>Combine vectors to a matrix. </td></tr> |
| |
| </table> |
| |
| @anchor examples |
| @exampo |
| |
| <b>Vector Norms and Distances</b> |
| |
| -# Create a database table with two vector columns and add some data. |
| <pre class="example"> |
| CREATE TABLE two_vectors( |
| id integer, |
| a float8[], |
| b float8[]); |
| </pre> |
| <pre class="example"> |
| INSERT INTO two_vectors VALUES |
| (1, '{3,4}', '{4,5}'), |
| (2, '{1,1,0,-4,5,3,4,106,14}', '{1,1,0,6,-3,1,2,92,2}'); |
| </pre> |
| -# Invoke norm functions. |
| <pre class="example"> |
| SELECT |
| id, |
| madlib.norm1(a), |
| madlib.norm2(a) |
| FROM two_vectors; |
| </pre> |
| Result: |
| <pre class="result"> |
| id | norm1 | norm2 |
| ----+-------+------------------ |
| 1 | 7 | 5 |
| 2 | 138 | 107.238052947636 |
| (2 rows) |
| </pre> |
| -# Invoke distance functions. |
| <pre class="example"> |
| SELECT |
| id, |
| madlib.dist_norm1(a, b), |
| madlib.dist_norm2(a, b), |
| madlib.dist_pnorm(a, b, 5) AS norm5, |
| madlib.dist_inf_norm(a, b), |
| madlib.squared_dist_norm2(a, b) AS sq_dist_norm2, |
| madlib.cosine_similarity(a, b), |
| madlib.dist_angle(a, b), |
| madlib.dist_tanimoto(a, b), |
| madlib.dist_jaccard(a::text[], b::text[]) |
| FROM two_vectors; |
| </pre> |
| Result: |
| <pre class="result"> |
| id | dist_norm1 | dist_norm2 | norm5 | dist_inf_norm | sq_dist_norm2 | cosine_similarity | dist_angle | dist_tanimoto | dist_jaccard |
| ----+------------+------------------+------------------+---------------+---------------+-------------------+--------------------+--------------------+------------------- |
| 1 | 2 | 1.4142135623731 | 1.14869835499704 | 1 | 2 | 0.999512076087079 | 0.0312398334302684 | 0.0588235294117647 | 0.666666666666667 |
| 2 | 48 | 22.6274169979695 | 15.585086360695 | 14 | 512 | 0.985403348449008 | 0.17106899659286 | 0.0498733684005455 | 0.833333333333333 |
| (2 rows) |
| </pre> |
| |
| <b>Matrix Functions</b> |
| |
| -# Create a database table with a matrix column. |
| <pre class="example"> |
| CREATE TABLE matrix( |
| id integer, |
| m float8[]); |
| </pre> |
| <pre class="example"> |
| INSERT INTO matrix VALUES |
| (1, '{{4,5},{3,5},{9,0}}'); |
| </pre> |
| -# Invoke matrix functions. |
| <pre class="example"> |
| SELECT |
| madlib.get_row(m, 1) AS row_1, |
| madlib.get_row(m, 2) AS row_2, |
| madlib.get_row(m, 3) AS row_3, |
| madlib.get_col(m, 1) AS col_1, |
| madlib.get_col(m, 2) AS col_2 |
| FROM matrix; |
| </pre> |
| Result: |
| <pre class="result"> |
| row_1 | row_2 | row_3 | col_1 | col_2 |
| -------+-------+-------+---------+--------- |
| {4,5} | {3,5} | {9,0} | {4,3,9} | {5,5,0} |
| (1 row) |
| </pre> |
| |
| <b>Aggregate Functions</b> |
| |
| -# Create a database table with a vector column. |
| <pre class="example"> |
| CREATE TABLE vector( |
| id integer, |
| v float8[]); |
| </pre> |
| <pre class="example"> |
| INSERT INTO vector VALUES |
| (1, '{4,3}'), |
| (2, '{8,6}'), |
| (3, '{12,9}'); |
| </pre> |
| -# Invoke aggregate functions. |
| <pre class="example"> |
| SELECT |
| madlib.avg(v), |
| madlib.normalized_avg(v), |
| madlib.matrix_agg(v) |
| FROM vector; |
| </pre> |
| Result: |
| <pre class="result"> |
| avg | normalized_avg | matrix_agg |
| -------+----------------+---------------------- |
| {8,6} | {0.8,0.6} | {{4,3},{8,6},{12,9}} |
| (1 row) |
| </pre> |
| |
| @anchor literature |
| @literature |
| |
| [1] http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_similarity_and_distance |
| |
| @anchor related |
| @par Related Topics |
| File linalg.sql_in documenting the SQL functions. |
| */ |
| |
| /** |
| * @brief 1-norm of a vector |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @return \f$ \| x \|_1 = \sum_{i=1}^n |x_i| \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.norm1( |
| x DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief 2-norm of a vector |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @return \f$ \| x \|_2 = \sqrt{\sum_{i=1}^n x_i^2} \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.norm2( |
| x DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Infinity-norm of the difference between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ \| x - y \|_\infty = \max_{i=1}^n \|x_i - y_i\| \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_inf_norm( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief p-norm of the difference between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @param p Scalar \f$ p > 0 \f$ |
| * @return \f$ \| x - y \|_p = (\sum_{i=1}^n \|x_i - y_i\|^p)^{\frac{1}{p}} \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_pnorm( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[], |
| p DOUBLE PRECISION |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief 1-norm of the difference between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ \| x - y \|_1 = \sum_{i=1}^n |x_i - y_i| \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_norm1( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief 2-norm of the difference between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ \| x - y \|_2 = \sqrt{\sum_{i=1}^n (x_i - y_i)^2} \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_norm2( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| |
| /** |
| * @brief cosine similarity score between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ \frac{\langle \vec x, \vec y \rangle} |
| * {\| \vec x \| \cdot \| \vec y \|} \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cosine_similarity( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| |
| |
| /** |
| * @brief Squared 2-norm of the difference between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ \| x - y \|_2^2 = \sum_{i=1}^n (x_i - y_i)^2 \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.squared_dist_norm2( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Angle between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ \arccos\left(\frac{\langle \vec x, \vec y \rangle} |
| * {\| \vec x \| \cdot \| \vec y \|}\right) \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_angle( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Tanimoto distance between two vectors |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_n) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ 1 - \frac{\langle \vec x, \vec y \rangle} |
| * {\| \vec x \|^2 \cdot \| \vec y \|^2 |
| * - \langle \vec x, \vec y \rangle} \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_tanimoto( |
| x DOUBLE PRECISION[], |
| y DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| |
| /** |
| * @brief Jaccard distance between two vectors (treated as sets) |
| * |
| * @param x Vector \f$ \vec x = (x_1, \dots, x_m) \f$ |
| * @param y Vector \f$ \vec y = (y_1, \dots, y_n) \f$ |
| * @return \f$ 1 - \frac{|x \cap y|}{|x \cup y|} \f$ |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dist_jaccard( |
| x TEXT[], |
| y TEXT[] |
| ) RETURNS DOUBLE PRECISION |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /* |
| * @brief closest_column return type |
| */ |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.closest_column_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.closest_column_result AS ( |
| column_id INTEGER, |
| distance DOUBLE PRECISION |
| ); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._closest_column( |
| M DOUBLE PRECISION[], |
| x DOUBLE PRECISION[], |
| dist REGPROC, |
| dist_dn TEXT |
| ) RETURNS MADLIB_SCHEMA.closest_column_result AS |
| 'MODULE_PATHNAME', 'closest_column' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(<!__HAS_FUNCTION_PROPERTIES__!>, <!NO SQL!>, <!!>); |
| |
| |
| -- Because of Jira MPP-23166, ORCA makes the following |
| -- function extremely slow because "NO SQL" now becomes |
| -- "CONTAINS SQL". This is why we disabled the optimizer |
| -- in kmeans. |
| /** |
| * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the column |
| * of \f$ M \f$ that is closest to \f$ \vec x \f$ |
| * |
| * @param M Matrix \f$ M = (\vec{m_0} \dots \vec{m_{l-1}}) |
| * \in \mathbb{R}^{k \times l} \f$ |
| * @param x Vector \f$ \vec x \in \mathbb R^k \f$ |
| * @param dist The metric \f$ \operatorname{dist} \f$. This needs to be a |
| * function with signature |
| * <tt>DOUBLE PRECISION[] x DOUBLE PRECISION[] -> DOUBLE PRECISION</tt>. |
| * |
| * @returns A composite value: |
| * - <tt>columns_id INTEGER</tt> - The 0-based index of the column of \f$ M \f$ |
| * that is closest to \f$ x \f$. In case of ties, the first such index is |
| * returned. That is, \c columns_id is the minimum element in the set |
| * \f$ \arg\min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$. |
| * - <tt>distance DOUBLE PRECISION</tt> - The minimum distance between any |
| * column of \f$ M \f$ and \f$ x \f$. That is, |
| * \f$ \min_{i=0,\dots,l-1} \operatorname{dist}(\vec{m_i}, \vec x) \f$. |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_column( |
| M DOUBLE PRECISION[], |
| x DOUBLE PRECISION[], |
| dist REGPROC /*+ DEFAULT 'squared_dist_norm2' */ |
| ) RETURNS MADLIB_SCHEMA.closest_column_result AS $$ |
| SELECT MADLIB_SCHEMA._closest_column($1, $2, $3, textin(regprocout($3))) |
| $$ LANGUAGE SQL IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_column( |
| M DOUBLE PRECISION[], |
| x DOUBLE PRECISION[] |
| ) RETURNS MADLIB_SCHEMA.closest_column_result |
| IMMUTABLE |
| STRICT |
| LANGUAGE sql |
| AS $$ |
| SELECT MADLIB_SCHEMA.closest_column($1, $2, |
| 'MADLIB_SCHEMA.squared_dist_norm2') |
| $$ |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL'); |
| |
| /* |
| * @brief closest_columns return type |
| */ |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.closest_columns_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.closest_columns_result AS ( |
| column_ids INTEGER[], |
| distances DOUBLE PRECISION[] |
| ); |
| |
| /** |
| * @brief Given matrix \f$ M \f$ and vector \f$ \vec x \f$ compute the columns |
| * of \f$ M \f$ that are closest to \f$ \vec x \f$ |
| * |
| * This function does essentially the same as \ref closest_column(), except that |
| * it allows to specify the number of closest columns to return. The return |
| * value is a composite value: |
| * - <tt>columns_ids INTEGER[]</tt> - The 0-based indices of the \c num columns |
| * of \f$ M \f$ that are closest to \f$ x \f$. In case of ties, the first |
| * such indices are returned. |
| * - <tt>distances DOUBLE PRECISION[]</tt> - The distances between the columns |
| * of \f$ M \f$ with indices in \c columns_ids and \f$ x \f$. That is, |
| * <tt>distances[i]</tt> contains |
| * \f$ \operatorname{dist}(\vec{m_j}, \vec x) \f$, where \f$ j = \f$ |
| * <tt>columns_ids[i]</tt>. |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._closest_columns( |
| M DOUBLE PRECISION[], |
| x DOUBLE PRECISION[], |
| num INTEGER, |
| dist REGPROC, |
| dist_dn TEXT |
| ) RETURNS MADLIB_SCHEMA.closest_columns_result AS |
| 'MODULE_PATHNAME', 'closest_columns' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(<!__HAS_FUNCTION_PROPERTIES__!>, <!NO SQL!>, <!!>); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_columns( |
| M DOUBLE PRECISION[], |
| x DOUBLE PRECISION[], |
| num INTEGER, |
| dist REGPROC |
| ) RETURNS MADLIB_SCHEMA.closest_columns_result AS $$ |
| SELECT MADLIB_SCHEMA._closest_columns($1, $2, $3, $4, textin(regprocout($4))) |
| $$ LANGUAGE SQL IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.closest_columns( |
| M DOUBLE PRECISION[], |
| x DOUBLE PRECISION[], |
| num INTEGER |
| ) RETURNS MADLIB_SCHEMA.closest_columns_result |
| IMMUTABLE |
| STRICT |
| LANGUAGE sql |
| AS $$ |
| SELECT MADLIB_SCHEMA.closest_columns($1, $2, $3, |
| 'MADLIB_SCHEMA.squared_dist_norm2') |
| $$ |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.avg_vector_transition( |
| state DOUBLE PRECISION[], |
| x DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| CALLED ON NULL INPUT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.avg_vector_merge( |
| state_left DOUBLE PRECISION[], |
| state_right DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| STRICT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.avg_vector_final( |
| state DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| STRICT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Compute the average of vectors |
| * |
| * Given vectors \f$ x_1, \dots, x_n \f$, compute the average |
| * \f$ \frac 1n \sum_{i=1}^n x_i \f$. |
| * |
| * @param x Point \f$ x_i \f$ |
| * @returns Average \f$ \frac 1n \sum_{i=1}^n x_i \f$ |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.avg(DOUBLE PRECISION []); |
| CREATE AGGREGATE MADLIB_SCHEMA.avg( |
| /*+ x */ DOUBLE PRECISION[] |
| ) ( |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.avg_vector_transition, |
| m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!PREFUNC=MADLIB_SCHEMA.avg_vector_merge,!>) |
| FINALFUNC=MADLIB_SCHEMA.avg_vector_final, |
| INITCOND='{0,0,0}' |
| ); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_transition( |
| state DOUBLE PRECISION[], |
| x DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| CALLED ON NULL INPUT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalized_avg_vector_final( |
| state DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| STRICT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Compute the normalized average of vectors |
| * |
| * Given vectors \f$ x_1, \dots, x_n \f$, define |
| * \f$ \widetilde{x} := \frac 1n \sum_{i=1}^n \frac{x_i}{\| x_i \|} \f$, and |
| * compute the normalized average |
| * \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$. |
| * |
| * @param x Point \f$ x_i \f$ |
| * @returns Normalized average \f$ \frac{\widetilde{x}}{\| \widetilde{x} \|} \f$ |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.normalized_avg(DOUBLE PRECISION []); |
| CREATE AGGREGATE MADLIB_SCHEMA.normalized_avg( |
| /*+ x */ DOUBLE PRECISION[] |
| ) ( |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.normalized_avg_vector_transition, |
| m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!PREFUNC=MADLIB_SCHEMA.avg_vector_merge,!>) |
| FINALFUNC=MADLIB_SCHEMA.normalized_avg_vector_final, |
| INITCOND='{0,0,0}' |
| ); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.matrix_agg_transition( |
| state DOUBLE PRECISION[], |
| x DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| STRICT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.matrix_agg_final( |
| state DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| STRICT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Combine vectors to a matrix |
| * |
| * Given vectors \f$ \vec x_1, \dots, \vec x_n \in \mathbb R^m \f$, |
| * return matrix \f$ ( \vec x_1 \dots \vec x_n ) \in \mathbb R^{m \times n}\f$. |
| * |
| * @param x Vector \f$ x_i \f$ |
| * @returns Matrix with columns \f$ x_1, \dots, x_n \f$ |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.matrix_agg(DOUBLE PRECISION []); |
| CREATE m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!ORDERED !>)AGGREGATE MADLIB_SCHEMA.matrix_agg( |
| /*+ x */ DOUBLE PRECISION[] |
| ) ( |
| STYPE=DOUBLE PRECISION[], |
| SFUNC=MADLIB_SCHEMA.matrix_agg_transition, |
| FINALFUNC=MADLIB_SCHEMA.matrix_agg_final, |
| INITCOND='{0,0,0}' |
| ); |
| |
| /** |
| * @brief Return the column of a matrix |
| * |
| * @param matrix Two-dimensional matrix |
| * @param col Column of the matrix to return (0-based index) |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.matrix_column( |
| matrix DOUBLE PRECISION[], |
| col INTEGER |
| ) RETURNS DOUBLE PRECISION[] |
| LANGUAGE c |
| IMMUTABLE |
| STRICT |
| AS 'MODULE_PATHNAME' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| ------------------------------ |
| /** |
| * @brief Construct a M-column N-row table of 2-D array |
| * |
| * @param in_array 2-D array |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.deconstruct_2d_array( |
| in_array double precision[] |
| ) RETURNS SETOF record |
| AS 'MODULE_PATHNAME', 'deconstruct_2d_array' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| ------------------------------ |
| /** |
| * @brief Construct a M-column M-row table of using lower triangle of 2-D array |
| * |
| * @param in_array 2-D array |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__deconstruct_lower_triangle( |
| in_array double precision[] |
| ) RETURNS SETOF record |
| AS 'MODULE_PATHNAME', 'deconstruct_lower_triangle' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| ------------------- Created for In-Memory Iteration Controller ----------- |
| /** |
| * @brief Return the input array as 1-D |
| * |
| * @param in_array 1-D or 2-D array |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.array_to_1d( |
| in_array DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME', 'array_to_1d' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Return A 2-D matrix that the number of rows is encoded as the first |
| * element of the input array and the number of cols second. |
| * |
| * @param in_array Input array with first 2 elements describing dimensions |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.array_to_2d( |
| in_array DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME', 'array_to_2d' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Return A 1-D form of the specified row of the given 2-D array |
| * |
| * @param in_2d_array Input 2-D array |
| * @param index |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.index_2d_array( |
| in_2d_array double precision[], |
| index integer |
| ) |
| RETURNS double precision[] |
| AS 'MODULE_PATHNAME', 'get_row_from_2d_array' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Get an indexed row of the given matrix (2-D array) |
| * |
| * @param in_2d_array Input 2-D array |
| * @param index |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.get_row( |
| in_2d_array double precision[], |
| index integer |
| ) |
| RETURNS double precision[] |
| AS 'MODULE_PATHNAME', 'get_row_from_2d_array' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| /** |
| * @brief Get an indexed col of the given matrix (2-D array) |
| * |
| * @param in_2d_array Input 2-D array |
| * @param index |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.get_col( |
| in_2d_array double precision[], |
| index integer |
| ) |
| RETURNS double precision[] |
| AS 'MODULE_PATHNAME', 'get_col_from_2d_array' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| m4_changequote(<!`!>, <!'!>) |