blob: 23abb40a449ddc4fcd2ce00ede6b9e1714acaacb [file] [log] [blame]
/* ----------------------------------------------------------------------- */
/**
*
* @file utilities.sql_in
*
* @brief SQL functions for carrying out routine tasks
*
* @sa For a brief overview of utility functions, see the
* module description \ref grp_utilities.
*
**/
/* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_utilities
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#utilities">Utility Functions</a></li>
<li><a href="#rel;ated">Related Topics</a></li>
</ul>
</div>
@brief Provides a collection of user-defined functions for performing common tasks in the database.
Database functions are a collection of lower level utilities
to assist data scientists and others in using MADlib.
@anchor utilities
@par Utility Functions
<table class="output">
<tr>
<th>version()</th>
<td>Return MADlib build information. </td>
</tr>
<tr>
<th>assert()</th>
<td>Raise an exception if the given condition is not satisfied.</td>
</tr>
<tr>
<th>check_if_raises_error()</th>
<td>Check if a SQL statement raises an error.</td>
</tr>
<tr>
<th>check_if_col_exists()</th>
<td>Check if a column exists in a table.</td>
</tr>
<tr>
<th>isnan()</th>
<td>Check if a floating-point number is NaN (not a number)</td>
</tr>
<tr>
<th>create_schema_pg_temp()</th>
<td>Create the temporary schema if it does not exist yet.</td>
</tr>
<tr>
<th>noop()</th>
<td>Create volatile noop function.</td>
</tr>
<tr>
<th>cleanup_madlib_temp_tables()</th>
<td>Drop all tables matching pattern '%madlib_temp%' in a given schema.</td>
</tr>
<tr>
<th>dropcols()</th>
<td>Create a new table with a subset of the columns dropped from a source table.
</td>
</tr>
</table>
Note: If the function cleanup_madlib_temp_tables() gives an Out-of-memory error,
then the number of tables to be dropped is too high to execute in one transaction.
In such a case, please follow the instructions provided with the error to execute
the command in multiple transactions.
@anchor related
@par Related Topics
File utilities.sql_in documenting the SQL functions.
*/
/**
* @brief Drop all tables matching pattern '%madlib_temp%' in a given schema
*
* @param target_schema TEXT. The schema that takes affect.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_tables(
target_schema text
)
RETURNS void AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_tables)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_tables_script(
target_schema text
)
RETURNS text AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_tables_script)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_views(
target_schema text
)
RETURNS void AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_views)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
/**
* @brief Return MADlib build information.
*
* @returns Summary of MADlib build information, consisting of MADlib version,
* git revision, cmake configuration time, build type, build system,
* C compiler, and C++ compiler
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.version()
RETURNS TEXT
AS $$
SELECT (
'MADlib version: __MADLIB_VERSION__, '
'git revision: __MADLIB_GIT_REVISION__, '
'cmake configuration time: __MADLIB_BUILD_TIME__, '
'build type: __MADLIB_BUILD_TYPE__, '
'build system: __MADLIB_BUILD_SYSTEM__, '
'C compiler: __MADLIB_C_COMPILER__, '
'C++ compiler: __MADLIB_CXX_COMPILER__')::TEXT
$$
LANGUAGE sql
IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
/**
* @brief Raise an exception if the given condition is not satisfied.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assert(condition BOOLEAN, msg VARCHAR)
RETURNS VOID
AS $$
BEGIN
IF NOT condition THEN
RAISE EXCEPTION 'Failed assertion: %', msg;
END IF;
END
$$
LANGUAGE plpgsql
IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Compute the relative error of an approximate value
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.relative_error(
approx DOUBLE PRECISION,
value DOUBLE PRECISION
) RETURNS DOUBLE PRECISION
AS $$
SELECT abs(($1 - $2)/$2)
$$
LANGUAGE sql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Compute the relative error (w.r.t. the 2-norm) of an apprixmate vector
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.relative_error(
approx DOUBLE PRECISION[],
value DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
LANGUAGE sql
AS $$
SELECT MADLIB_SCHEMA.dist_norm2($1, $2) / MADLIB_SCHEMA.norm2($2)
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Check if a SQL statement raises an error
*
* @param sql The SQL statement
* @returns \c TRUE if an exception is raised while executing \c sql, \c FALSE
* otherwise.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.check_if_raises_error(
sql TEXT
) RETURNS BOOLEAN
AS $$
BEGIN
EXECUTE sql;
RETURN FALSE;
EXCEPTION
WHEN OTHERS THEN
RETURN TRUE;
END;
$$
LANGUAGE plpgsql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Check if a column exists in a table
*
* @param source_table Source table
* @param column_name Column name in the table
* @returns \c TRUE if it exists and FALSE if not
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.check_if_col_exists(
source_table TEXT,
column_name TEXT
)
RETURNS BOOLEAN AS $$
DECLARE
sql TEXT;
input_table_name VARCHAR[];
actual_table_name VARCHAR;
schema_name VARCHAR;
BEGIN
input_table_name = regexp_split_to_array(source_table, E'\\.');
IF array_upper(input_table_name, 1) = 1 THEN
actual_table_name = input_table_name[1];
schema_name := current_schema();
ELSIF array_upper(input_table_name, 1) = 2 THEN
actual_table_name = input_table_name[2];
schema_name = input_table_name[1];
ELSE
RAISE EXCEPTION 'Incorrect input source table name provided';
END IF;
sql := 'SELECT MADLIB_SCHEMA.assert(count( column_name )>0, ''Error'') FROM information_schema.columns WHERE table_schema = ''' || schema_name || ''' AND table_name = ''' || actual_table_name || ''' AND column_name= ''' || column_name || '''';
raise notice '%', sql;
RETURN NOT MADLIB_SCHEMA.check_if_raises_error(sql);
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Check if a floating-point number is NaN (not a number)
*
* This function exists for portability. Some DBMSs like PostgreSQL treat
* floating-point numbers as fully ordered -- contrary to IEEE 754. (See, e.g.,
* the <a href=
* "http://www.postgresql.org/docs/current/static/datatype-numeric.html#DATATYPE-FLOAT"
* >PostgreSQL documentation</a>. For portability, MADlib code should not make
* use of such "features" directly, but only use isnan() instead.
*
* @param number
* @returns \c TRUE if \c number is \c NaN, \c FALSE otherwise
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.isnan(
number DOUBLE PRECISION
) RETURNS BOOLEAN
AS $$
SELECT $1 = 'NaN'::DOUBLE PRECISION;
$$
LANGUAGE sql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Create the temporary schema if it does not exist yet
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.create_schema_pg_temp()
RETURNS VOID
LANGUAGE plpgsql
VOLATILE
AS $$
BEGIN
-- pg_my_temp_schema() is a built-in function
IF pg_my_temp_schema() = 0 THEN
-- The pg_temp schema does not exist, yet. Creating a temporary table
-- will create it. Note: There is *no* race condition here, because
-- every session has its own temp schema.
EXECUTE 'CREATE TEMPORARY TABLE _madlib_temp_table AS SELECT 1;
DROP TABLE pg_temp._madlib_temp_table CASCADE;';
END IF;
END;
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Create volatile noop function
*
* The only use of this function is as an optimization fence when used in the
* SELECT list of a query. See, e.g.,
* http://archives.postgresql.org/pgsql-sql/2012-07/msg00030.php
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.noop()
RETURNS VOID
AS 'MODULE_PATHNAME'
LANGUAGE c
VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/*
* Create type bytea8 with 8-byte alignment.
*/
m4_ifdef(`__UDT_NOT_ALLOWED__', `', `
CREATE TYPE MADLIB_SCHEMA.bytea8;
')
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8in(cstring)
RETURNS MADLIB_SCHEMA.bytea8 AS 'byteain'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8out(MADLIB_SCHEMA.bytea8)
RETURNS cstring AS 'byteaout'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8recv(internal)
RETURNS MADLIB_SCHEMA.bytea8 AS 'bytearecv'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8send(MADLIB_SCHEMA.bytea8)
RETURNS bytea AS 'byteasend'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
m4_ifdef(`__UDT_NOT_ALLOWED__', `', `
CREATE TYPE MADLIB_SCHEMA.bytea8(
INPUT = MADLIB_SCHEMA.bytea8in,
OUTPUT = MADLIB_SCHEMA.bytea8out,
RECEIVE = MADLIB_SCHEMA.bytea8recv,
SEND = MADLIB_SCHEMA.bytea8send,
ALIGNMENT = double,
STORAGE = external
);
')
/**
* @brief Get all column names except dependent variable
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__internal_get_col_names_except_dep_variable(
source_table VARCHAR -- name of input table
, dependent_varname VARCHAR -- name of dependent variable
)
RETURNS VARCHAR AS $$
DECLARE
col_names VARCHAR[];
BEGIN
EXECUTE 'SELECT ARRAY(SELECT DISTINCT column_name::varchar from ' ||
' information_schema.columns WHERE ' ||
'column_name NOT LIKE ''' || dependent_varname || '''' ||
'AND table_name LIKE ''' || source_table || ''')'
INTO col_names;
RETURN 'ARRAY[' || array_to_string(col_names, ',') || ']';
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
------------------------------------------------------------------------
/**
* @brief Generate random remporary names for temp table and other names
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__unique_string ()
RETURNS VARCHAR AS $$
PythonFunction(utilities, utilities, unique_string)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Takes a string of comma separated values and puts it into an array
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._string_to_array (
s VARCHAR
)
RETURNS VARCHAR[] AS $$
PythonFunction(utilities, utilities, _string_to_sql_array)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Cast boolean into text
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bool_to_text (BOOLEAN)
RETURNS TEXT
STRICT
LANGUAGE SQL AS $$
SELECT CASE
WHEN $1
THEN 'true'
ELSE 'false'
END;
$$m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
/**
* @brief Cast any value to text.
*
* @param val A value with any specific type.
*
* @return The text format string for the value.
*
* @note Greenplum doesn't support bit/boolean to text casting.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__to_char(val anyelement)
RETURNS TEXT
AS 'MODULE_PATHNAME', '__to_text'
LANGUAGE C STRICT IMMUTABLE;
------------------------------------------------------------------------
/*
* An array_agg() function is defined in module array_ops (to compatibility with
* GP 4.0.
*/
------------Added for the in-memory group iteration controller----------
DROP TYPE IF EXISTS MADLIB_SCHEMA._grp_state_type CASCADE;
CREATE TYPE MADLIB_SCHEMA._grp_state_type AS(
grp_key TEXT,
iteration INTEGER,
state DOUBLE PRECISION[]
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._gen_state(
grp_key TEXT[],
iteration INTEGER[],
state DOUBLE PRECISION[])
RETURNS SETOF MADLIB_SCHEMA._grp_state_type AS $$
num_grp = len(grp_key)
if num_grp == 0:
return
num_var = len(state) / num_grp
for i in range(num_grp):
if iteration is None:
yield (grp_key[i], None, state[i * num_var : (i + 1) * num_var] if len(state) > 0 else None)
else:
yield (grp_key[i], iteration[i], state[i * num_var : (i + 1) * num_var] if len(state) > 0 else None)
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._final_mode(double precision[])
RETURNS double precision AS
$BODY$
SELECT a
FROM unnest($1) a
GROUP BY 1
ORDER BY COUNT(1) DESC, 1
LIMIT 1;
$BODY$
LANGUAGE 'sql' IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
-- Tell Postgres how to use our aggregate
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.mode(double precision) CASCADE;
CREATE AGGREGATE MADLIB_SCHEMA.mode(double precision) (
SFUNC=array_append, --Function to call for each row. Just builds the array
STYPE=double precision[],
FINALFUNC=MADLIB_SCHEMA._final_mode, --Function to call after everything has been added to array
INITCOND='{}' --Initialize an empty array when starting
);
------------------------------------------------------------------------
/**
* @brief Creates a new table with a subset of columns dropped from another
* source table.
*
* @param source_table Name of the table containing the source data.
* @param out_table Name of the generated table containing the output.
If a table with the same name already exists, an error will be returned.
* @param cols_to_drop Comma-separated string of column names from the source
* table to drop. An error is returned if the output table does not
* contain any columns or if a requested column is not present in the
* source table.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dropcols (
source_table VARCHAR,
out_table VARCHAR,
cols_to_drop VARCHAR
)
RETURNS void AS $$
PythonFunctionBodyOnly(utilities, utilities)
from utilities import control
with control.MinWarning('error'):
return utilities.create_table_drop_cols(source_table, out_table, cols_to_drop)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_pg_major_version_less_than(
compare_version INTEGER)
RETURNS BOOLEAN AS $$
PythonFunction(utilities, utilities, is_pg_major_version_less_than)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
-- Function to trap errors while running sql in plpy
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.trap_error(
stmt TEXT
)
RETURNS INTEGER AS $$
BEGIN
BEGIN
EXECUTE stmt;
EXCEPTION
WHEN OTHERS THEN
RETURN 1;
END;
RETURN 0;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.test_error_msg(
stmt TEXT,
msg TEXT
)
RETURNS BOOLEAN AS $$
try:
plpy.execute(stmt)
return TRUE
except Exception as ex:
return msg in ex.message
$$ LANGUAGE plpythonu;
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.test_input_table(
stmt TEXT
)
RETURNS BOOLEAN AS $$
SELECT MADLIB_SCHEMA.test_error_msg($1, 'NULL/empty input table name');
$$ LANGUAGE SQL;
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.test_output_table(
stmt TEXT
)
RETURNS BOOLEAN AS $$
SELECT MADLIB_SCHEMA.test_error_msg($1, 'NULL/empty output table name');
$$ LANGUAGE SQL;
-- A few of the gucs like plan_cache_mode and dev_opt_unsafe_truncate_in_subtransaction
-- are only available in either > pg 11 or > gpdb 6.5. Using this function we
-- can make sure to run the guc assertion test (assert_guc_value) on the correct
-- platform versions.
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_ver_greater_than_gp_640_or_pg_11()
RETURNS BOOLEAN AS $$
PythonFunctionBodyOnly(utilities, utilities, is_gp_version_less_than)
from utilities.utilities import __mad_version
from utilities.utilities import is_platform_pg
from utilities.utilities import is_pg_major_version_less_than
if is_platform_pg:
is_pg_major_less_than_12 = is_pg_major_version_less_than(None, 12)
return not is_pg_major_less_than_12
else:
is_ver_less_than_650 = __mad_version().is_gp_version_less_than('6.5.0')
return not is_ver_less_than_650
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assert_guc_value(
guc_name TEXT,
expected_guc_value TEXT)
RETURNS VOID AS $$
import plpy
actual_guc_value = plpy.execute('show {0}'.format(guc_name))[0]
actual_guc_value = actual_guc_value[guc_name]
if actual_guc_value != expected_guc_value:
plpy.error('guc {0} assertion failed. Expected Value: {1}, '
'Actual Value: {2}'.format(guc_name, expected_guc_value, actual_guc_value))
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_table_unlogged(
tbl_name TEXT
)
RETURNS boolean AS $$
DECLARE
ret_val boolean;
BEGIN
BEGIN
SELECT relpersistence='u' INTO ret_val FROM pg_class WHERE relname = tbl_name;
EXCEPTION
WHEN OTHERS THEN
RETURN false; -- Traps exception and returns false, for GP versions < 6 as relpersistence is not a col in pg_class
END;
RETURN ret_val;
END;
$$ LANGUAGE plpgsql;