| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file svec_util.sql_in |
| * |
| * @brief SQL utility functions for sparse vector data type |
| * <tt>svec</tt> |
| * |
| * @sa For an introduction to the sparse vector implementation, see the module |
| * description \ref grp_svec. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| --! Creates sparse vector representation given an array of indexes, respective values and |
| --! size of the required vector. The function just does the bucket filling with the values |
| --! at the respective indexes for the vector given the length. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gen_svec(BIGINT[], FLOAT8[], BIGINT) |
| RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'generate_sparse_vector' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| --! Creates the output table containing the sparse vector representation for the documents |
| --! given the dictionary table, documents tables and names of the respective columns. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gen_doc_svecs( |
| output_tbl TEXT, |
| dictionary_tbl TEXT, |
| dict_id_col TEXT, |
| dict_term_col TEXT, |
| documents_tbl TEXT, |
| doc_id_col TEXT, |
| doc_term_col TEXT, |
| doc_term_info_col TEXT |
| ) RETURNS TEXT AS $$ |
| PythonFunction(svec_util, generate_svec, generate_doc_svecs) |
| $$ LANGUAGE plpythonu VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA'); |
| |
| --! Helper function for MADLIB_SCHEMA.gen_doc_svec UDF. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gen_doc_svecs() |
| RETURNS TEXT AS $$ |
| PythonFunction(svec_util, generate_svec, generate_doc_svecs_help) |
| $$ LANGUAGE plpythonu IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL'); |
| |
| --! Basic floating point scalar operator: MIN. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmin(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_min' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Basic floating point scalar operator: MAX. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dmax(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_max' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Counts the number of non-zero entries in the input vector; the second argument is capped at 1, then added to the first; used as the sfunc in the svec_count_nonzero() aggregate below. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_count(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec |
| AS 'MODULE_PATHNAME', 'svec_count' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the logarithm of each element of the input SVEC. |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_log(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_log' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Returns true if two SVECs are equal, not counting zeros (zero equals anything). If the two SVECs are of different size, then the function essentially zero-pads the shorter one and performs the comparison. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_eq_non_zero(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_eq_non_zero' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Returns true if left svec contains right one, meaning that every non-zero value in the right svec equals left one |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_contains(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_contains' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the l2norm of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l2norm' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the l2norm of a float8 array. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l2norm' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the l2norm distance between two SVECs. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l2norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) |
| RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l2norm' LANGUAGE C STRICT IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the l1norm distance between two SVECs. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l1norm(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) |
| RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_l1norm' LANGUAGE C STRICT IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the l1norm of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l1norm' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the l1norm of a float8 array. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l1norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l1norm' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the angle between two SVECs in radians. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.angle(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) |
| RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_angle' LANGUAGE C STRICT IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the Tanimoto distance between two SVECs. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.tanimoto_distance(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) |
| RETURNS float8 AS 'MODULE_PATHNAME', 'svec_svec_tanimoto_distance' LANGUAGE C STRICT IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Unnests an SVEC into a table of uncompressed values |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_unnest(MADLIB_SCHEMA.svec) RETURNS setof float8 AS 'MODULE_PATHNAME', 'svec_unnest' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Appends an element to the back of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_pivot(MADLIB_SCHEMA.svec,float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pivot' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Sums the elements of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_summate' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Sums the elements of a float8 array. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_elsum(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_summate' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the median element of a float8 array. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_median' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the median element of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_median(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_median' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Compares an SVEC to a float8, and returns positions of all elements not equal to the float as an array. Element index here starts at 0. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_positions(MADLIB_SCHEMA.svec, FLOAT8) RETURNS INT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_positions' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Compares an SVEC to a float8, and returns values of all elements not equal to the float as an array. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_nonbase_values(MADLIB_SCHEMA.svec, FLOAT8) RETURNS FLOAT8[] AS 'MODULE_PATHNAME', 'svec_nonbase_values' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| --! Returns the dimension of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_dimension(MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_dimension' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Applies a given function to each element of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_lapply(text,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_lapply' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Appends a run-length block to the back of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_append(MADLIB_SCHEMA.svec,float8,int8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_append' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Projects onto an element of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_proj(MADLIB_SCHEMA.svec,int4) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_proj' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Extracts a subvector of an SVEC given the subvector's start and end indices. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_subvec(MADLIB_SCHEMA.svec,int4,int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_subvec' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Reverses the elements of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_reverse(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_reverse' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Replaces the subvector of a given SVEC at a given start index with another SVEC. Note that element index should start at 1. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_change(MADLIB_SCHEMA.svec,int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_change' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the hash of an SVEC. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_hash(MADLIB_SCHEMA.svec) RETURNS int4 AS 'MODULE_PATHNAME', 'svec_hash' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Computes the word-occurence vector of a document |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sfv(text[], text[]) RETURNS MADLIB_SCHEMA.svec AS |
| 'MODULE_PATHNAME', 'gp_extract_feature_histogram' LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Sorts an array of texts. This function should be in MADlib common. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_sort(text[]) RETURNS text[] AS $$ |
| SELECT array(SELECT unnest($1::text[]) ORDER BY 1); |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `'); |
| |
| --! Converts an svec to a text string |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_to_string(MADLIB_SCHEMA.svec) |
| RETURNS text AS 'MODULE_PATHNAME', 'svec_to_string' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Converts a text string to an svec |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_from_string(text) |
| RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_from_string' STRICT LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| --! Transition function for mean(svec) aggregate |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_transition( FLOAT[], MADLIB_SCHEMA.svec) |
| RETURNS FLOAT[] AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Preliminary merge function for mean(svec) aggregate |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_prefunc( FLOAT[], FLOAT[]) |
| RETURNS FLOAT[] AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Final function for mean(svec) aggregate |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mean_final( FLOAT[]) |
| RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| --! Aggregate that computes the element-wise mean of a list of vectors. |
| --! |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.mean( MADLIB_SCHEMA.svec) CASCADE; |
| CREATE AGGREGATE MADLIB_SCHEMA.mean( MADLIB_SCHEMA.svec) ( |
| SFUNC = MADLIB_SCHEMA.svec_mean_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc = MADLIB_SCHEMA.svec_mean_prefunc,') |
| FINALFUNC = MADLIB_SCHEMA.svec_mean_final, |
| STYPE = FLOAT[] |
| ); |
| |
| --! Aggregate that provides the element-wise sum of a list of vectors. |
| --! |
| -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_sum(MADLIB_SCHEMA.svec); |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_sum (MADLIB_SCHEMA.svec) CASCADE; |
| CREATE AGGREGATE MADLIB_SCHEMA.svec_sum (MADLIB_SCHEMA.svec) ( |
| SFUNC = MADLIB_SCHEMA.svec_plus, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.svec_plus,') |
| INITCOND = '{1}:{0.}', -- Zero |
| STYPE = MADLIB_SCHEMA.svec |
| ); |
| |
| --! Aggregate that provides a tally of nonzero entries in a list of vectors. |
| --! |
| -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_count_nonzero(MADLIB_SCHEMA.svec); |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_count_nonzero (MADLIB_SCHEMA.svec) CASCADE; |
| CREATE AGGREGATE MADLIB_SCHEMA.svec_count_nonzero (MADLIB_SCHEMA.svec) ( |
| SFUNC = MADLIB_SCHEMA.svec_count, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.svec_plus,') |
| INITCOND = '{1}:{0.}', -- Zero |
| STYPE = MADLIB_SCHEMA.svec |
| ); |
| |
| --! Aggregate that turns a list of float8 values into an SVEC. |
| --! |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_agg (float8) CASCADE; |
| CREATE m4_ifdef(`__POSTGRESQL__', `', `ORDERED') |
| AGGREGATE MADLIB_SCHEMA.svec_agg (float8) ( |
| SFUNC = MADLIB_SCHEMA.svec_pivot, |
| STYPE = MADLIB_SCHEMA.svec |
| ); |
| |
| --! Aggregate that computes the median element of a list of float8 values. |
| --! |
| -- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_median_inmemory(float8); |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svec_median_inmemory (float8) CASCADE; |
| CREATE AGGREGATE MADLIB_SCHEMA.svec_median_inmemory (float8) ( |
| SFUNC = MADLIB_SCHEMA.svec_pivot, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.svec_concat,') |
| FINALFUNC = MADLIB_SCHEMA.svec_median, |
| STYPE = MADLIB_SCHEMA.svec |
| ); |
| |
| --! Normalizes an SVEC that is divides all elements by its norm/magnitude. |
| --! |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.normalize(MADLIB_SCHEMA.svec) |
| RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_normalize' LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |