blob: 0ec864d585b4b2fce63c4f7e9352756eecaad370 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file utilities.sql_in
*
* @brief SQL functions for carrying out routine tasks
*
* @sa For a brief overview of utility functions, see the
* module description \ref grp_utilities.
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_utilities
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#utilities">Utility Functions</a></li>
<li><a href="#rel;ated">Related Topics</a></li>
</ul>
</div>
@brief Provides a collection of user-defined functions for performing common tasks in the database.
The utility module consists of useful utility functions to assist data
scientists in using the product. Several of these functions can be used
while implementing new algorithms.
@anchor utilities
@par Utility Functions
<table class="output">
<tr>
<th>version()</th>
<td>Return MADlib build information. </td>
</tr>
<tr>
<th>assert()</th>
<td>Raise an exception if the given condition is not satisfied.</td>
</tr>
<tr>
<th>check_if_raises_error()</th>
<td>Check if a SQL statement raises an error.</td>
</tr>
<tr>
<th>check_if_col_exists()</th>
<td>Check if a column exists in a table.</td>
</tr>
<tr>
<th>isnan()</th>
<td>Check if a floating-point number is NaN (not a number)</td>
</tr>
<tr>
<th>create_schema_pg_temp()</th>
<td>Create the temporary schema if it does not exist yet.</td>
</tr>
<tr>
<th>noop()</th>
<td>Create volatile noop function.</td>
</tr>
<tr>
<th>cleanup_madlib_temp_tables()</th>
<td>Drop all tables matching pattern '%madlib_temp%' in a given schema.</td>
</tr>
</table>
Note: If the function cleanup_madlib_temp_tables() gives an Out-of-memory error,
then the number of tables to be dropped is too high to execute in one transaction.
In such a case, please follow the instructions provided with the error to execute
the command in multiple transactions.
@anchor related
@par Related Topics
File utilities.sql_in documenting the SQL functions.
*/
/**
* @brief Drop all tables matching pattern '%madlib_temp%' in a given schema
*
* @param target_schema TEXT. The schema that takes affect.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_tables(
target_schema text
)
RETURNS void AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_tables)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_tables_script(
target_schema text
)
RETURNS text AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_tables_script)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
/**
* @brief Return MADlib build information.
*
* @returns Summary of MADlib build information, consisting of MADlib version,
* git revision, cmake configuration time, build type, build system,
* C compiler, and C++ compiler
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.version()
RETURNS TEXT
AS $$
SELECT (
'MADlib version: __MADLIB_VERSION__, '
'git revision: __MADLIB_GIT_REVISION__, '
'cmake configuration time: __MADLIB_BUILD_TIME__, '
'build type: __MADLIB_BUILD_TYPE__, '
'build system: __MADLIB_BUILD_SYSTEM__, '
'C compiler: __MADLIB_C_COMPILER__, '
'C++ compiler: __MADLIB_CXX_COMPILER__')::TEXT
$$
LANGUAGE sql
IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
/**
* @brief Raise an exception if the given condition is not satisfied.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assert(condition BOOLEAN, msg VARCHAR)
RETURNS VOID
AS $$
BEGIN
IF NOT condition THEN
RAISE EXCEPTION 'Failed assertion: %', msg;
END IF;
END
$$
LANGUAGE plpgsql
IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Compute the relative error of an approximate value
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.relative_error(
approx DOUBLE PRECISION,
value DOUBLE PRECISION
) RETURNS DOUBLE PRECISION
AS $$
SELECT abs(($1 - $2)/$2)
$$
LANGUAGE sql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Compute the relative error (w.r.t. the 2-norm) of an apprixmate vector
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.relative_error(
approx DOUBLE PRECISION[],
value DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
LANGUAGE sql
AS $$
SELECT MADLIB_SCHEMA.dist_norm2($1, $2) / MADLIB_SCHEMA.norm2($2)
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Check if a SQL statement raises an error
*
* @param sql The SQL statement
* @returns \c TRUE if an exception is raised while executing \c sql, \c FALSE
* otherwise.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.check_if_raises_error(
sql TEXT
) RETURNS BOOLEAN
AS $$
BEGIN
EXECUTE sql;
RETURN FALSE;
EXCEPTION
WHEN OTHERS THEN
RETURN TRUE;
END;
$$
LANGUAGE plpgsql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Check if a column exists in a table
*
* @param source_table Source table
* @param column_name Column name in the table
* @returns \c TRUE if it exsists and FALSE if not
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.check_if_col_exists(
source_table TEXT,
column_name TEXT
)
RETURNS BOOLEAN AS $$
DECLARE
sql TEXT;
input_table_name VARCHAR[];
actual_table_name VARCHAR;
schema_name VARCHAR;
BEGIN
input_table_name = regexp_split_to_array(source_table, E'\\.');
IF array_upper(input_table_name, 1) = 1 THEN
actual_table_name = input_table_name[1];
schema_name := current_schema();
ELSIF array_upper(input_table_name, 1) = 2 THEN
actual_table_name = input_table_name[2];
schema_name = input_table_name[1];
ELSE
RAISE EXCEPTION 'Incorrect input source table name provided';
END IF;
sql := 'SELECT MADLIB_SCHEMA.assert(count( column_name )>0, ''Error'') FROM information_schema.columns WHERE table_schema = ''' || schema_name || ''' AND table_name = ''' || actual_table_name || ''' AND column_name= ''' || column_name || '''';
raise notice '%', sql;
RETURN NOT MADLIB_SCHEMA.check_if_raises_error(sql);
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Check if a floating-point number is NaN (not a number)
*
* This function exists for portability. Some DBMSs like PostgreSQL treat
* floating-point numbers as fully ordered -- contrary to IEEE 754. (See, e.g.,
* the <a href=
* "http://www.postgresql.org/docs/current/static/datatype-numeric.html#DATATYPE-FLOAT"
* >PostgreSQL documentation</a>. For portability, MADlib code should not make
* use of such "features" directly, but only use isnan() instead.
*
* @param number
* @returns \c TRUE if \c number is \c NaN, \c FALSE otherwise
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.isnan(
number DOUBLE PRECISION
) RETURNS BOOLEAN
AS $$
SELECT $1 = 'NaN'::DOUBLE PRECISION;
$$
LANGUAGE sql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Create the temporary schema if it does not exist yet
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.create_schema_pg_temp()
RETURNS VOID
LANGUAGE plpgsql
VOLATILE
AS $$
BEGIN
-- pg_my_temp_schema() is a built-in function
IF pg_my_temp_schema() = 0 THEN
-- The pg_temp schema does not exist, yet. Creating a temporary table
-- will create it. Note: There is *no* race condition here, because
-- every session has its own temp schema.
EXECUTE 'CREATE TEMPORARY TABLE _madlib_temp_table AS SELECT 1;
DROP TABLE pg_temp._madlib_temp_table CASCADE;';
END IF;
END;
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Create volatile noop function
*
* The only use of this function is as an optimization fence when used in the
* SELECT list of a query. See, e.g.,
* http://archives.postgresql.org/pgsql-sql/2012-07/msg00030.php
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.noop()
RETURNS VOID
AS 'MODULE_PATHNAME'
LANGUAGE c
VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/*
* Create type bytea8 with 8-byte alignment.
*/
m4_ifdef(`__UDT_NOT_ALLOWED__', `', `
CREATE TYPE MADLIB_SCHEMA.bytea8;
')
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8in(cstring)
RETURNS MADLIB_SCHEMA.bytea8 AS 'byteain'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8out(MADLIB_SCHEMA.bytea8)
RETURNS cstring AS 'byteaout'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8recv(internal)
RETURNS MADLIB_SCHEMA.bytea8 AS 'bytearecv'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8send(MADLIB_SCHEMA.bytea8)
RETURNS bytea AS 'byteasend'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
m4_ifdef(`__UDT_NOT_ALLOWED__', `', `
CREATE TYPE MADLIB_SCHEMA.bytea8(
INPUT = MADLIB_SCHEMA.bytea8in,
OUTPUT = MADLIB_SCHEMA.bytea8out,
RECEIVE = MADLIB_SCHEMA.bytea8recv,
SEND = MADLIB_SCHEMA.bytea8send,
ALIGNMENT = double,
STORAGE = external
);
')
/**
* @brief Get all column names except dependent variable
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__internal_get_col_names_except_dep_variable(
source_table VARCHAR -- name of input table
, dependent_varname VARCHAR -- name of dependent variable
)
RETURNS VARCHAR AS $$
DECLARE
col_names VARCHAR[];
BEGIN
EXECUTE 'SELECT ARRAY(SELECT DISTINCT column_name::varchar from ' ||
' information_schema.columns WHERE ' ||
'column_name NOT LIKE ''' || dependent_varname || '''' ||
'AND table_name LIKE ''' || source_table || ''')'
INTO col_names;
RETURN 'ARRAY[' || array_to_string(col_names, ',') || ']';
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Generate random remporary names for temp table and other names
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__unique_string ()
RETURNS VARCHAR AS $$
PythonFunction(utilities, utilities, unique_string)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Takes a string of comma separated values and puts it into an array
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._string_to_array (
s VARCHAR
)
RETURNS VARCHAR[] AS $$
PythonFunction(utilities, utilities, _string_to_sql_array)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Cast boolean into text
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bool_to_text (BOOLEAN)
RETURNS TEXT
STRICT
LANGUAGE SQL AS '
SELECT CASE
WHEN $1 THEN ''t''
ELSE ''f''
END;
'm4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Cast any value to text.
*
* @param val A value with any specific type.
*
* @return The text format string for the value.
*
* @note Greenplum doesn't support bit/boolean to text casting.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__to_char(val anyelement)
RETURNS TEXT
AS 'MODULE_PATHNAME', '__to_text'
LANGUAGE C STRICT IMMUTABLE;
------------------------------------------------------------------------
/*
* An array_agg() function is defined in module array_ops (to compatibility with
* GP 4.0.
*/
------------Added for the in-memory group iteration controller----------
DROP TYPE IF EXISTS MADLIB_SCHEMA._grp_state_type CASCADE;
CREATE TYPE MADLIB_SCHEMA._grp_state_type AS(
grp_key TEXT,
iteration INTEGER,
state DOUBLE PRECISION[]
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._gen_state(
grp_key TEXT[],
iteration INTEGER[],
state DOUBLE PRECISION[])
RETURNS SETOF MADLIB_SCHEMA._grp_state_type AS $$
num_grp = len(grp_key)
if num_grp == 0:
return
num_var = len(state) / num_grp
for i in range(num_grp):
if iteration is None:
yield (grp_key[i], None, state[i * num_var : (i + 1) * num_var] if len(state) > 0 else None)
else:
yield (grp_key[i], iteration[i], state[i * num_var : (i + 1) * num_var] if len(state) > 0 else None)
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._final_mode(double precision[])
RETURNS double precision AS
$BODY$
SELECT a
FROM unnest($1) a
GROUP BY 1
ORDER BY COUNT(1) DESC, 1
LIMIT 1;
$BODY$
LANGUAGE 'sql' IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
-- Tell Postgres how to use our aggregate
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.mode(double precision) CASCADE;
CREATE AGGREGATE MADLIB_SCHEMA.mode(double precision) (
SFUNC=array_append, --Function to call for each row. Just builds the array
STYPE=double precision[],
FINALFUNC=MADLIB_SCHEMA._final_mode, --Function to call after everything has been added to array
INITCOND='{}' --Initialize an empty array when starting
);