blob: 6413cb7f385105cf3aaa6e7972602b0146d61325 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file gp_svec.sql_in
*
* @brief SQL type definitions and functions for sparse vector data type
* <tt>svec</tt>
*
* @sa For an introduction to the sparse vector implementation, see the module
* description \ref grp_svec.
*
*//* ----------------------------------------------------------------------- */
/**
@addtogroup grp_svec
@about
This module implements a sparse vector data type named "svec", which
gives compressed storage of sparse vectors with many duplicate elements.
When we use arrays of floating point numbers for various calculations,
we will sometimes have long runs of zeros (or some other default value).
This is common in applications like scientific computing,
retail optimization, and text processing. Each floating point number takes
8 bytes of storage in memory and/or disk, so saving those zeros is often
worthwhile. There are also many computations that can benefit from skipping
over the zeros.
To focus the discussion, consider, for example, the following
array of doubles stored as a Postgres/GP "float8[]" data type:
\code
'{0, 33,...40,000 zeros..., 12, 22 }'::float8[].
\endcode
This array would occupy slightly more than 320KB of memory/disk, most of
it zeros. Even if we were to exploit the null bitmap and store the zeros
as nulls, we would still end up with a 5KB null bitmap, which is still
not nearly as memory efficient as we'd like. Also, as we perform various
operations on the array, we'll often be doing work on 40,000 fields that
would turn out not to be important.
To solve the problems associated with the processing of sparse vectors
discussed above, we adopt a simple Run Length Encoding (RLE) scheme to
represent sparse vectors as pairs of count-value arrays. So, for example,
the array above would be represented as follows
\code
'{1,1,40000,1,1}:{0,33,0,12,22}'::madlib.svec,
\endcode
which says there is 1 occurrence of 0, followed by 1 occurrence of 33,
followed by 40,000 occurrences of 0, etc. In contrast to the naive
representations, we only need 5 integers and 5 floating point numbers
to store the array. Further, it is easy to implement vector operations
that can take advantage of the RLE representation to make computations
faster. The module provides a library of such functions.
The current version only supports sparse vectors of float8
values. Future versions will support other base types.
@usage
Syntax reference can be found in gp_svec.sql_in.
Users need to add madlib to their search_path to use the svec operators
defined in the module.
@examp
We can input an array directly as an svec as follows:
\code
testdb=# select '{1,1,40000,1,1}:{0,33,0,12,22}'::madlib.svec;
\endcode
We can also cast an array into an svec:
\code
testdb=# select ('{0,33,...40,000 zeros...,12,22}'::float8[])::madlib.svec;
\endcode
We can use operations with svec type like <, >, *, **, /, =, +, SUM, etc,
and they have meanings associated with typical vector operations. For
example, the plus (+) operator adds each of the terms of two vectors having
the same dimension together.
\code
testdb=# select ('{0,1,5}'::float8[]::madlib.svec + '{4,3,2}'::float8[]::madlib.svec)::float8[];
float8
---------
{4,4,7}
\endcode
Without the casting into float8[] at the end, we get:
\code
testdb=# select '{0,1,5}'::float8[]::madlib.svec + '{4,3,2}'::float8[]::madlib.svec;
?column?
----------
{2,1}:{4,7}
\endcode
A dot product (%*%) between the two vectors will result in a scalar
result of type float8. The dot product should be (0*4 + 1*3 + 5*2) = 13,
like this:
\code
testdb=# select '{0,1,5}'::float8[]::madlib.svec %*% '{4,3,2}'::float8[]::madlib.svec;
?column?
----------
13
\endcode
Special vector aggregate functions are also available. SUM is self
explanatory. VEC_COUNT_NONZERO evaluates the count of non-zero terms
in each column found in a set of n-dimensional svecs and returns an
svec with the counts. For instance, if we have the vectors {0,1,5},
{10,0,3},{0,0,3},{0,1,0}, then executing the VEC_COUNT_NONZERO() aggregate
function would result in {1,2,3}:
\code
testdb=# create table list (a madlib.svec);
testdb=# insert into list values ('{0,1,5}'::float8[]), ('{10,0,3}'::float8[]), ('{0,0,3}'::float8[]),('{0,1,0}'::float8[]);
testdb=# select madlib.vec_count_nonzero(a)::float8[] from list;
vec_count_nonzero
-----------------
{1,2,3}
\endcode
We do not use null bitmaps in the svec data type. A null value in an svec
is represented explicitly as an NVP (No Value Present) value. For example,
we have:
\code
testdb=# select '{1,2,3}:{4,null,5}'::madlib.svec;
svec
-------------------
{1,2,3}:{4,NVP,5}
testdb=# select '{1,2,3}:{4,null,5}'::madlib.svec + '{2,2,2}:{8,9,10}'::madlib.svec;
?column?
--------------------------
{1,2,1,2}:{12,NVP,14,15}
\endcode
An element of an svec can be accessed using the svec_proj() function,
which takes an svec and the index of the element desired.
\code
testdb=# select madlib.svec_proj('{1,2,3}:{4,5,6}'::madlib.svec, 1) + madlib.svec_proj('{4,5,6}:{1,2,3}'::madlib.svec, 15);
?column?
----------
7
\endcode
A subvector of an svec can be accessed using the svec_subvec() function,
which takes an svec and the start and end index of the subvector desired.
\code
testdb=# select madlib.svec_subvec('{2,4,6}:{1,3,5}'::madlib.svec, 2, 11);
svec_subvec
-----------------
{1,4,5}:{1,3,5}
\endcode
The elements/subvector of an svec can be changed using the function
svec_change(). It takes three arguments: an m-dimensional svec sv1, a
start index j, and an n-dimensional svec sv2 such that j + n - 1 <= m,
and returns an svec like sv1 but with the subvector sv1[j:j+n-1]
replaced by sv2. An example follows:
\code
testdb=# select madlib.svec_change('{1,2,3}:{4,5,6}'::madlib.svec,3,'{2}:{3}'::madlib.svec);
svec_change
---------------------
{1,1,2,2}:{4,5,3,6}
\endcode
There are also higher-order functions for processing svecs. For example,
the following is the corresponding function for lapply() in R.
\code
testdb=# select madlib.svec_lapply('sqrt', '{1,2,3}:{4,5,6}'::madlib.svec);
svec_lapply
-----------------------------------------------
{1,2,3}:{2,2.23606797749979,2.44948974278318}
\endcode
The full list of functions available for operating on svecs are available
in gp_svec.sql.
Other examples of svecs usage can be found in the k-means module.
@sa file gp_svec.sql_in (documenting the SQL functions)
@internal
@sa file sparse_vector.c (documenting the implementation in C)
@endinternal
*/
--! @file gp_svec.sql_in
--!
-- DROP SCHEMA MADLIB_SCHEMA CASCADE;
-- CREATE SCHEMA MADLIB_SCHEMA;
-- DROP TYPE IF EXISTS MADLIB_SCHEMA.svec CASCADE;
CREATE TYPE MADLIB_SCHEMA.svec;
--! SVEC constructor from CSTRING.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_in(cstring)
RETURNS MADLIB_SCHEMA.svec
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
--! Converts SVEC to CSTRING.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_out(MADLIB_SCHEMA.svec)
RETURNS cstring
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
--! Converts SVEC internal representation to SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_recv(internal)
RETURNS MADLIB_SCHEMA.svec
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
--! Converts SVEC to BYTEA.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_send(MADLIB_SCHEMA.svec)
RETURNS bytea
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT;
CREATE TYPE MADLIB_SCHEMA.svec (
internallength = VARIABLE,
input = MADLIB_SCHEMA.svec_in,
output = MADLIB_SCHEMA.svec_out,
send = MADLIB_SCHEMA.svec_send,
receive = MADLIB_SCHEMA.svec_recv,
storage=EXTENDED,
alignment = double
);
--! Basic floating point scalar operator: MIN.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dmin(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_min' LANGUAGE C IMMUTABLE;
--! Basic floating point scalar operator: MAX.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dmax(float8,float8) RETURNS float8 AS 'MODULE_PATHNAME', 'float8_max' LANGUAGE C IMMUTABLE;
--! Counts the number of non-zero entries in the input vector; the second argument is capped at 1, then added to the first; used as the sfunc in the vec_count_nonzero() aggregate below.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_count(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec
AS 'MODULE_PATHNAME', 'svec_count' STRICT LANGUAGE C IMMUTABLE;
--! Adds two SVECs together, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_plus(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_plus' STRICT LANGUAGE C IMMUTABLE;
--! Minus second SVEC from the first, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_minus(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_minus' STRICT LANGUAGE C IMMUTABLE;
--! Computes the logarithm of each element of the input SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.log(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_log' STRICT LANGUAGE C IMMUTABLE;
--! Divides the first SVEC by the second, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_div(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_div' STRICT LANGUAGE C IMMUTABLE;
--! Multiplies two SVEVs together, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mult(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_mult' STRICT LANGUAGE C IMMUTABLE;
--! Raises each element of the first SVEC to the power given by second SVEC, which must have dimension 1 (a scalar).
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_pow(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pow' STRICT LANGUAGE C IMMUTABLE;
--! Returns true if two SVECs are equal
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_eq(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS boolean AS 'MODULE_PATHNAME', 'svec_eq' STRICT LANGUAGE C IMMUTABLE;
--! Returns true if two float8 arrays are equal
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_eq(float8[],float8[]) RETURNS boolean AS 'MODULE_PATHNAME', 'float8arr_equals' LANGUAGE C IMMUTABLE;
--! Minus second array from the first array, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_minus_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_minus_float8arr' LANGUAGE C IMMUTABLE;
--! Minus second SVEC from the first array, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_minus_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_minus_svec' LANGUAGE C IMMUTABLE;
--! Minus second array from the first SVEC, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_minus_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_minus_float8arr' LANGUAGE C IMMUTABLE;
--! Adds two arrays together, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_plus_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_plus_float8arr' LANGUAGE C IMMUTABLE;
--! Adds an array and an SVEC, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_plus_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_plus_svec' LANGUAGE C IMMUTABLE;
--! Adds an SVEC and an array, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_plus_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_plus_float8arr' LANGUAGE C IMMUTABLE;
--! Multiplies two float8 arrays, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_mult_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_mult_float8arr' LANGUAGE C IMMUTABLE;
--! Multiplies an array and an SVEC, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_mult_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_mult_svec' LANGUAGE C IMMUTABLE;
--! Multiplies an SVEC and an array, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_mult_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_mult_float8arr' LANGUAGE C IMMUTABLE;
--! Divides a float8 array by another, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_div_float8arr(float8[],float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_div_float8arr' LANGUAGE C IMMUTABLE;
--! Divides a float8 array by an SVEC, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_div_svec(float8[],MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'float8arr_div_svec' LANGUAGE C IMMUTABLE;
--! Divides an SVEC by a float8 array, element by element.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_div_float8arr(MADLIB_SCHEMA.svec,float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_div_float8arr' LANGUAGE C IMMUTABLE;
--! Computes the dot product of two SVECs.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dot(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_dot' STRICT LANGUAGE C IMMUTABLE;
--! Computes the dot product of two float8 arrays.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dot(float8[],float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_dot' STRICT LANGUAGE C IMMUTABLE;
--! Computes the dot product of an SVEC and a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dot(MADLIB_SCHEMA.svec,float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_dot_float8arr' STRICT LANGUAGE C IMMUTABLE;
--! Computes the dot product of a float8 array and an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dot(float8[],MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_dot_svec' STRICT LANGUAGE C IMMUTABLE;
--! Computes the l2norm of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l2norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l2norm' STRICT LANGUAGE C IMMUTABLE;
--! Computes the l2norm of a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l2norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l2norm' LANGUAGE C IMMUTABLE;
--! Computes the l1norm of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l1norm(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_l1norm' STRICT LANGUAGE C IMMUTABLE;
--! Computes the l1norm of a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.l1norm(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_l1norm' STRICT LANGUAGE C IMMUTABLE;
--! Unnests an SVEC into a table of uncompressed values
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.unnest(MADLIB_SCHEMA.svec) RETURNS setof float8 AS 'MODULE_PATHNAME', 'svec_unnest' LANGUAGE C IMMUTABLE;
--! Appends an element to the back of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vec_pivot(MADLIB_SCHEMA.svec,float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_pivot' LANGUAGE C IMMUTABLE;
--! Sums the elements of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vec_sum(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_summate' STRICT LANGUAGE C IMMUTABLE;
--! Sums the elements of a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vec_sum(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_summate' STRICT LANGUAGE C IMMUTABLE;
--! Computes the median element of a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vec_median(float8[]) RETURNS float8 AS 'MODULE_PATHNAME', 'float8arr_median' STRICT LANGUAGE C IMMUTABLE;
--! Computes the median element of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.vec_median(MADLIB_SCHEMA.svec) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_median' STRICT LANGUAGE C IMMUTABLE;
--! Casts an int2 into an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int2(int2) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int2' STRICT LANGUAGE C IMMUTABLE;
--! Casts an int4 into an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int4(int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int4' STRICT LANGUAGE C IMMUTABLE;
--! Casts an int8 into an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_int8(bigint) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_int8' STRICT LANGUAGE C IMMUTABLE;
--! Casts a float4 into an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float4(float4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float4' STRICT LANGUAGE C IMMUTABLE;
--! Casts a float8 into an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float8(float8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float8' STRICT LANGUAGE C IMMUTABLE;
--! Casts a numeric into an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_numeric(numeric) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_numeric' STRICT LANGUAGE C IMMUTABLE;
--! Casts an int2 into a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int2(int2) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int2' STRICT LANGUAGE C IMMUTABLE;
--! Casts an int4 into a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int4(int4) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int4' STRICT LANGUAGE C IMMUTABLE;
--! Casts an int8 into a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_int8(bigint) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_int8' STRICT LANGUAGE C IMMUTABLE;
--! Casts a float4 into a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_float4(float4) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_float4' STRICT LANGUAGE C IMMUTABLE;
--! Casts a float8 into a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_float8(float8) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_float8' STRICT LANGUAGE C IMMUTABLE;
--! Casts a numeric into a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.float8arr_cast_numeric(numeric) RETURNS float8[] AS 'MODULE_PATHNAME', 'float8arr_cast_numeric' STRICT LANGUAGE C IMMUTABLE;
--! Casts a float8 into an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_cast_float8arr(float8[]) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_cast_float8arr' STRICT LANGUAGE C IMMUTABLE;
--! Casts an SVEC into a float8 array.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_return_array(MADLIB_SCHEMA.svec) RETURNS float8[] AS 'MODULE_PATHNAME', 'svec_return_array' LANGUAGE C IMMUTABLE;
--! Concatenates two SVECs.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_concat(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_concat' LANGUAGE C IMMUTABLE;
--! Replicates n copies of an SVEC and concatenates them together.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_concat_replicate(int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_concat_replicate' LANGUAGE C IMMUTABLE;
--! Returns the dimension of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dimension(MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_dimension' LANGUAGE C IMMUTABLE;
--! Applies a given function to each element of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_lapply(text,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_lapply' LANGUAGE C IMMUTABLE;
--! Appends a run-length block to the back of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_append(MADLIB_SCHEMA.svec,float8,int8) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_append' LANGUAGE C IMMUTABLE;
--! Projects onto an element of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_proj(MADLIB_SCHEMA.svec,int4) RETURNS float8 AS 'MODULE_PATHNAME', 'svec_proj' LANGUAGE C IMMUTABLE;
--! Extracts a subvector of an SVEC given the subvector's start and end indices.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_subvec(MADLIB_SCHEMA.svec,int4,int4) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_subvec' LANGUAGE C IMMUTABLE;
--! Reverses the elements of an SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_reverse(MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_reverse' LANGUAGE C IMMUTABLE;
--! Replaces the subvector of a given SVEC at a given start index with another SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_change(MADLIB_SCHEMA.svec,int4,MADLIB_SCHEMA.svec) RETURNS MADLIB_SCHEMA.svec AS 'MODULE_PATHNAME', 'svec_change' LANGUAGE C IMMUTABLE;
/*
DROP OPERATOR IF EXISTS || ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS - ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS + ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS / ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS %*% ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS * ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS ^ ( MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);
*/
CREATE OPERATOR MADLIB_SCHEMA.|| (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.svec_concat
);
CREATE OPERATOR MADLIB_SCHEMA.- (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.svec_minus
);
CREATE OPERATOR MADLIB_SCHEMA.+ (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.svec_plus
);
CREATE OPERATOR MADLIB_SCHEMA./ (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.svec_div
);
CREATE OPERATOR MADLIB_SCHEMA.%*% (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.dot
);
CREATE OPERATOR MADLIB_SCHEMA.* (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.svec_mult
);
CREATE OPERATOR MADLIB_SCHEMA.^ (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.svec_pow
);
-- float8[] operators
-- DROP OPERATOR IF EXISTS = ( float8[], float8[]);
/*
DROP OPERATOR IF EXISTS %*% ( float8[], MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS %*% ( MADLIB_SCHEMA.svec, float8[]);
DROP OPERATOR IF EXISTS %*% ( float8[], float8[]);
DROP OPERATOR IF EXISTS - ( float8[], float8[]);
DROP OPERATOR IF EXISTS + ( float8[], float8[]);
DROP OPERATOR IF EXISTS * ( float8[], float8[]);
DROP OPERATOR IF EXISTS / ( float8[], float8[]);
DROP OPERATOR IF EXISTS - ( float8[], MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS + ( float8[], MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS * ( float8[], MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS / ( float8[], MADLIB_SCHEMA.svec);
DROP OPERATOR IF EXISTS - ( MADLIB_SCHEMA.svec, float8[]);
DROP OPERATOR IF EXISTS + ( MADLIB_SCHEMA.svec, float8[]);
DROP OPERATOR IF EXISTS * ( MADLIB_SCHEMA.svec, float8[]);
DROP OPERATOR IF EXISTS / ( MADLIB_SCHEMA.svec, float8[]);
*/
/*
CREATE OPERATOR MADLIB_SCHEMA.= (
leftarg = float8[],
rightarg = float8[],
procedure = MADLIB_SCHEMA.float8arr_eq,
commutator = = ,
-- negator = <> ,
restrict = eqsel, join = eqjoinsel
);
*/
CREATE OPERATOR MADLIB_SCHEMA.%*% (
LEFTARG = float8[],
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.dot
);
CREATE OPERATOR MADLIB_SCHEMA.%*% (
LEFTARG = float8[],
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.dot
);
CREATE OPERATOR MADLIB_SCHEMA.%*% (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.dot
);
CREATE OPERATOR MADLIB_SCHEMA.- (
LEFTARG = float8[],
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.float8arr_minus_float8arr
);
CREATE OPERATOR MADLIB_SCHEMA.+ (
LEFTARG = float8[],
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.float8arr_plus_float8arr
);
CREATE OPERATOR MADLIB_SCHEMA.* (
LEFTARG = float8[],
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.float8arr_mult_float8arr
);
CREATE OPERATOR MADLIB_SCHEMA./ (
LEFTARG = float8[],
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.float8arr_div_float8arr
);
CREATE OPERATOR MADLIB_SCHEMA.- (
LEFTARG = float8[],
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.float8arr_minus_svec
);
CREATE OPERATOR MADLIB_SCHEMA.+ (
LEFTARG = float8[],
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.float8arr_plus_svec
);
CREATE OPERATOR MADLIB_SCHEMA.* (
LEFTARG = float8[],
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.float8arr_mult_svec
);
CREATE OPERATOR MADLIB_SCHEMA./ (
LEFTARG = float8[],
RIGHTARG = MADLIB_SCHEMA.svec,
PROCEDURE = MADLIB_SCHEMA.float8arr_div_svec
);
CREATE OPERATOR MADLIB_SCHEMA.- (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.svec_minus_float8arr
);
CREATE OPERATOR MADLIB_SCHEMA.+ (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.svec_plus_float8arr
);
CREATE OPERATOR MADLIB_SCHEMA.* (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.svec_mult_float8arr
);
CREATE OPERATOR MADLIB_SCHEMA./ (
LEFTARG = MADLIB_SCHEMA.svec,
RIGHTARG = float8[],
PROCEDURE = MADLIB_SCHEMA.svec_div_float8arr
);
/*
DROP CAST IF EXISTS (int2 AS MADLIB_SCHEMA.svec) ;
DROP CAST IF EXISTS (integer AS MADLIB_SCHEMA.svec) ;
DROP CAST IF EXISTS (bigint AS MADLIB_SCHEMA.svec) ;
DROP CAST IF EXISTS (float4 AS MADLIB_SCHEMA.svec) ;
DROP CAST IF EXISTS (float8 AS MADLIB_SCHEMA.svec) ;
DROP CAST IF EXISTS (numeric AS MADLIB_SCHEMA.svec) ;
*/
CREATE CAST (int2 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int2(int2) ; -- AS IMPLICIT;
CREATE CAST (integer AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int4(integer) ; -- AS IMPLICIT;
CREATE CAST (bigint AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_int8(bigint) ; -- AS IMPLICIT;
CREATE CAST (float4 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float4(float4) ; -- AS IMPLICIT;
CREATE CAST (float8 AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float8(float8) ; -- AS IMPLICIT;
CREATE CAST (numeric AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_numeric(numeric) ; -- AS IMPLICIT;
/*
DROP CAST IF EXISTS (int2 AS float8[]) ;
DROP CAST IF EXISTS (integer AS float8[]) ;
DROP CAST IF EXISTS (bigint AS float8[]) ;
DROP CAST IF EXISTS (float4 AS float8[]) ;
DROP CAST IF EXISTS (float8 AS float8[]) ;
DROP CAST IF EXISTS (numeric AS float8[]) ;
*/
-- CREATE CAST (int2 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int2(int2) ; -- AS IMPLICIT;
-- CREATE CAST (integer AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int4(integer) ; -- AS IMPLICIT;
-- CREATE CAST (bigint AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_int8(bigint) ; -- AS IMPLICIT;
-- CREATE CAST (float4 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_float4(float4) ; -- AS IMPLICIT;
-- CREATE CAST (float8 AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_float8(float8) ; -- AS IMPLICIT;
-- CREATE CAST (numeric AS float8[]) WITH FUNCTION MADLIB_SCHEMA.float8arr_cast_numeric(numeric) ; -- AS IMPLICIT;
-- DROP CAST IF EXISTS (MADLIB_SCHEMA.svec AS float8[]) ;
-- DROP CAST IF EXISTS (float8[] AS MADLIB_SCHEMA.svec) ;
CREATE CAST (MADLIB_SCHEMA.svec AS float8[]) WITH FUNCTION MADLIB_SCHEMA.svec_return_array(MADLIB_SCHEMA.svec) ; -- AS IMPLICIT;
CREATE CAST (float8[] AS MADLIB_SCHEMA.svec) WITH FUNCTION MADLIB_SCHEMA.svec_cast_float8arr(float8[]) ; -- AS IMPLICIT;
-- DROP OPERATOR IF EXISTS = (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) ;
CREATE OPERATOR MADLIB_SCHEMA.= (
leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_eq,
commutator = = ,
-- negator = <> ,
restrict = eqsel, join = eqjoinsel
);
--! Aggregate that provides the element-wise sum of a list of vectors.
--!
-- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.sum(MADLIB_SCHEMA.svec);
CREATE AGGREGATE MADLIB_SCHEMA.sum (MADLIB_SCHEMA.svec) (
SFUNC = MADLIB_SCHEMA.svec_plus,
PREFUNC = MADLIB_SCHEMA.svec_plus,
INITCOND = '{1}:{0.}', -- Zero
STYPE = MADLIB_SCHEMA.svec
);
--! Aggregate that provides a tally of nonzero entries in a list of vectors.
--!
-- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.vec_count_nonzero(MADLIB_SCHEMA.svec);
CREATE AGGREGATE MADLIB_SCHEMA.vec_count_nonzero (MADLIB_SCHEMA.svec) (
SFUNC = MADLIB_SCHEMA.svec_count,
PREFUNC = MADLIB_SCHEMA.svec_plus,
INITCOND = '{1}:{0.}', -- Zero
STYPE = MADLIB_SCHEMA.svec
);
--! Aggregate that turns a list of float8 values into an SVEC.
--!
-- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.array_agg(float8);
CREATE AGGREGATE MADLIB_SCHEMA.array_agg (float8) (
SFUNC = MADLIB_SCHEMA.vec_pivot,
PREFUNC = MADLIB_SCHEMA.svec_concat,
STYPE = MADLIB_SCHEMA.svec
);
--! Aggregate that computes the median element of a list of float8 values.
--!
-- DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.median_inmemory(float8);
CREATE AGGREGATE MADLIB_SCHEMA.median_inmemory (float8) (
SFUNC = MADLIB_SCHEMA.vec_pivot,
PREFUNC = MADLIB_SCHEMA.svec_concat,
FINALFUNC = MADLIB_SCHEMA.vec_median,
STYPE = MADLIB_SCHEMA.svec
);
-- Comparisons based on L2 Norm
--! Returns true if the l2 norm of the first SVEC is less than that of the second SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_lt(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_lt' LANGUAGE C IMMUTABLE;
--! Returns true if the l2 norm of the first SVEC is less than or equal to that of the second SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_le(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_le' LANGUAGE C IMMUTABLE;
--! Returns true if the l2 norm of the first SVEC is equal to that of the second SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_eq(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_eq' LANGUAGE C IMMUTABLE;
--! Returns true if the l2 norm of the first SVEC is not equal to that of the second SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_ne(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_ne' LANGUAGE C IMMUTABLE;
--! Returns true if the l2 norm of the first SVEC is greater than that of the second SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_gt(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_gt' LANGUAGE C IMMUTABLE;
--! Returns true if the l2 norm of the first SVEC is greater than or equal to that of the second SVEC.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_ge(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS bool AS 'MODULE_PATHNAME', 'svec_l2_ge' LANGUAGE C IMMUTABLE;
--! Returns a value indicating the relative values of the l2 norms of two SVECs.
--!
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svec_l2_cmp(MADLIB_SCHEMA.svec,MADLIB_SCHEMA.svec) RETURNS integer AS 'MODULE_PATHNAME', 'svec_l2_cmp' LANGUAGE C IMMUTABLE;
/*
DROP OPERATOR IF EXISTS < (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
DROP OPERATOR IF EXISTS <= (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
DROP OPERATOR IF EXISTS <> (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) ;
DROP OPERATOR IF EXISTS == (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
DROP OPERATOR IF EXISTS > (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
DROP OPERATOR IF EXISTS >= (MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec) CASCADE ;
DROP OPERATOR IF EXISTS *|| (int4, MADLIB_SCHEMA.svec) ;
*/
CREATE OPERATOR MADLIB_SCHEMA.< (
leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_lt,
commutator = > , negator = >= ,
restrict = scalarltsel, join = scalarltjoinsel
);
CREATE OPERATOR MADLIB_SCHEMA.<= (
leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_le,
commutator = >= , negator = > ,
restrict = scalarltsel, join = scalarltjoinsel
);
CREATE OPERATOR MADLIB_SCHEMA.<> (
leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_eq,
commutator = <> ,
negator = =,
restrict = eqsel, join = eqjoinsel
);
CREATE OPERATOR MADLIB_SCHEMA.== (
leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_eq,
commutator = = ,
negator = <> ,
restrict = eqsel, join = eqjoinsel
);
CREATE OPERATOR MADLIB_SCHEMA.>= (
leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_ge,
commutator = <= , negator = < ,
restrict = scalargtsel, join = scalargtjoinsel
);
CREATE OPERATOR MADLIB_SCHEMA.> (
leftarg = MADLIB_SCHEMA.svec, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_l2_gt,
commutator = < , negator = <= ,
restrict = scalargtsel, join = scalargtjoinsel
);
CREATE OPERATOR MADLIB_SCHEMA.*|| (
leftarg = int4, rightarg = MADLIB_SCHEMA.svec, procedure = MADLIB_SCHEMA.svec_concat_replicate
);
CREATE OPERATOR CLASS MADLIB_SCHEMA.svec_l2_ops
DEFAULT FOR TYPE MADLIB_SCHEMA.svec USING btree AS
OPERATOR 1 MADLIB_SCHEMA.< ,
OPERATOR 2 MADLIB_SCHEMA.<= ,
OPERATOR 3 MADLIB_SCHEMA.== ,
OPERATOR 4 MADLIB_SCHEMA.>= ,
OPERATOR 5 MADLIB_SCHEMA.> ,
FUNCTION 1 MADLIB_SCHEMA.svec_l2_cmp(MADLIB_SCHEMA.svec, MADLIB_SCHEMA.svec);