/* ----------------------------------------------------------------------- */
/**
 *
 * @file utilities.sql_in
 *
 * @brief SQL functions for carrying out routine tasks
 *
 * @sa For a brief overview of utility functions, see the
 *     module description \ref grp_utilities.
 *
**/
/* ----------------------------------------------------------------------- */

m4_include(`SQLCommon.m4')

/**
@addtogroup grp_utilities

<div class="toc"><b>Contents</b>
  <ul>
    <li><a href="#utilities">Utility Functions</a></li>
    <li><a href="#rel;ated">Related Topics</a></li>
  </ul>
</div>

@brief Provides a collection of user-defined functions for performing common tasks in the database.

Database functions are a collection of lower level utilities
to assist data scientists and others in using MADlib.

@anchor utilities
@par Utility Functions

<table class="output">

  <tr>
    <th>version()</th>
    <td>Return MADlib build information. </td>
  </tr>

  <tr>
    <th>assert()</th>
    <td>Raise an exception if the given condition is not satisfied.</td>
  </tr>

  <tr>
    <th>check_if_raises_error()</th>
    <td>Check if a SQL statement raises an error.</td>
  </tr>

  <tr>
    <th>check_if_col_exists()</th>
    <td>Check if a column exists in a table.</td>
  </tr>

  <tr>
    <th>isnan()</th>
    <td>Check if a floating-point number is NaN (not a number)</td>
  </tr>

  <tr>
    <th>create_schema_pg_temp()</th>
    <td>Create the temporary schema if it does not exist yet.</td>
  </tr>

  <tr>
    <th>noop()</th>
    <td>Create volatile noop function.</td>
  </tr>

  <tr>
    <th>cleanup_madlib_temp_tables()</th>
    <td>Drop all tables matching pattern '%madlib_temp%' in a given schema.</td>
  </tr>

  <tr>
    <th>dropcols()</th>
    <td>Create a new table with a subset of the columns dropped from a source table.
    </td>
  </tr>

</table>

Note: If the function cleanup_madlib_temp_tables() gives an Out-of-memory error,
then the number of tables to be dropped is too high to execute in one transaction.
In such a case, please follow the instructions provided with the error to execute
the command in multiple transactions.

@anchor related
@par Related Topics

File utilities.sql_in documenting the SQL functions.

*/

/**
 * @brief Drop all tables matching pattern '%madlib_temp%' in a given schema
 *
 * @param target_schema TEXT. The schema that takes affect.
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_tables(
    target_schema text
)
RETURNS void AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_tables)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_tables_script(
    target_schema text
)
RETURNS text AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_tables_script)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.cleanup_madlib_temp_views(
    target_schema text
)
RETURNS void AS $$
PythonFunction(utilities, admin, cleanup_madlib_temp_views)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

/**
 * @brief Return MADlib build information.
 *
 * @returns Summary of MADlib build information, consisting of MADlib version,
 *     git revision, cmake configuration time, build type, build system,
 *     C compiler, and C++ compiler
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.version()
RETURNS TEXT
AS $$
    SELECT (
        'MADlib version: __MADLIB_VERSION__, '
        'git revision: __MADLIB_GIT_REVISION__, '
        'cmake configuration time: __MADLIB_BUILD_TIME__, '
        'build type: __MADLIB_BUILD_TYPE__, '
        'build system: __MADLIB_BUILD_SYSTEM__, '
        'C compiler: __MADLIB_C_COMPILER__, '
        'C++ compiler: __MADLIB_CXX_COMPILER__')::TEXT
$$
LANGUAGE sql
IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

/**
 * @brief Raise an exception if the given condition is not satisfied.
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assert(condition BOOLEAN, msg VARCHAR)
RETURNS VOID
AS $$
BEGIN
    IF NOT condition THEN
        RAISE EXCEPTION 'Failed assertion: %', msg;
    END IF;
END
$$
LANGUAGE plpgsql
IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');


------------------------------------------------------------------------

/**
 * @brief Compute the relative error of an approximate value
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.relative_error(
    approx DOUBLE PRECISION,
    value DOUBLE PRECISION
) RETURNS DOUBLE PRECISION
AS $$
    SELECT abs(($1 - $2)/$2)
$$
LANGUAGE sql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

------------------------------------------------------------------------

/**
 * @brief Compute the relative error (w.r.t. the 2-norm) of an apprixmate vector
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.relative_error(
    approx DOUBLE PRECISION[],
    value DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION
LANGUAGE sql
AS $$
    SELECT MADLIB_SCHEMA.dist_norm2($1, $2) / MADLIB_SCHEMA.norm2($2)
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

------------------------------------------------------------------------

/**
 * @brief Check if a SQL statement raises an error
 *
 * @param sql The SQL statement
 * @returns \c TRUE if an exception is raised while executing \c sql, \c FALSE
 *     otherwise.
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.check_if_raises_error(
    sql TEXT
) RETURNS BOOLEAN
AS $$
BEGIN
    EXECUTE sql;
    RETURN FALSE;
EXCEPTION
    WHEN OTHERS THEN
        RETURN TRUE;
END;
$$
LANGUAGE plpgsql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
------------------------------------------------------------------------

/**
 * @brief Check if a column exists in a table
 *
 * @param source_table Source table
 * @param column_name  Column name in the table
 * @returns \c TRUE if it exists and FALSE if not
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.check_if_col_exists(
    source_table TEXT,
    column_name TEXT
)
RETURNS BOOLEAN AS $$
DECLARE
  sql                           TEXT;
  input_table_name              VARCHAR[];
  actual_table_name             VARCHAR;
  schema_name                   VARCHAR;
BEGIN

  input_table_name = regexp_split_to_array(source_table, E'\\.');
  IF array_upper(input_table_name, 1) = 1 THEN
    actual_table_name = input_table_name[1];
    schema_name  := current_schema();
  ELSIF array_upper(input_table_name, 1) = 2 THEN
    actual_table_name = input_table_name[2];
    schema_name  = input_table_name[1];
  ELSE
    RAISE EXCEPTION 'Incorrect input source table name provided';
  END IF;
  sql := 'SELECT MADLIB_SCHEMA.assert(count( column_name )>0, ''Error'') FROM information_schema.columns WHERE table_schema = ''' || schema_name || ''' AND table_name = ''' || actual_table_name || ''' AND column_name= ''' || column_name || '''';
  raise notice '%', sql;

  RETURN NOT MADLIB_SCHEMA.check_if_raises_error(sql);
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');
------------------------------------------------------------------------

/**
 * @brief Check if a floating-point number is NaN (not a number)
 *
 * This function exists for portability. Some DBMSs like PostgreSQL treat
 * floating-point numbers as fully ordered -- contrary to IEEE 754. (See, e.g.,
 * the <a href=
 * "http://www.postgresql.org/docs/current/static/datatype-numeric.html#DATATYPE-FLOAT"
 * >PostgreSQL documentation</a>. For portability, MADlib code should not make
 * use of such "features" directly, but only use isnan() instead.
 *
 * @param number
 * @returns \c TRUE if \c number is \c NaN, \c FALSE otherwise
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.isnan(
    number DOUBLE PRECISION
) RETURNS BOOLEAN
AS $$
    SELECT $1 = 'NaN'::DOUBLE PRECISION;
$$
LANGUAGE sql
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

------------------------------------------------------------------------

/**
 * @brief Create the temporary schema if it does not exist yet
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.create_schema_pg_temp()
RETURNS VOID
LANGUAGE plpgsql
VOLATILE
AS $$
BEGIN
    -- pg_my_temp_schema() is a built-in function
    IF pg_my_temp_schema() = 0 THEN
        -- The pg_temp schema does not exist, yet. Creating a temporary table
        -- will create it. Note: There is *no* race condition here, because
        -- every session has its own temp schema.
        EXECUTE 'CREATE TEMPORARY TABLE _madlib_temp_table AS SELECT 1;
            DROP TABLE pg_temp._madlib_temp_table CASCADE;';
    END IF;
END;
$$
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

------------------------------------------------------------------------

/**
 * @brief Create volatile noop function
 *
 * The only use of this function is as an optimization fence when used in the
 * SELECT list of a query. See, e.g.,
 * http://archives.postgresql.org/pgsql-sql/2012-07/msg00030.php
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.noop()
RETURNS VOID
AS 'MODULE_PATHNAME'
LANGUAGE c
VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

------------------------------------------------------------------------

/*
 * Create type bytea8 with 8-byte alignment.
 */
m4_ifdef(`__UDT_NOT_ALLOWED__', `', `
CREATE TYPE MADLIB_SCHEMA.bytea8;
')

CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8in(cstring)
RETURNS MADLIB_SCHEMA.bytea8 AS 'byteain'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');

CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8out(MADLIB_SCHEMA.bytea8)
RETURNS cstring AS 'byteaout'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8recv(internal)
RETURNS MADLIB_SCHEMA.bytea8 AS 'bytearecv'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bytea8send(MADLIB_SCHEMA.bytea8)
RETURNS bytea AS 'byteasend'
LANGUAGE internal IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


m4_ifdef(`__UDT_NOT_ALLOWED__', `', `
CREATE TYPE MADLIB_SCHEMA.bytea8(
    INPUT = MADLIB_SCHEMA.bytea8in,
    OUTPUT = MADLIB_SCHEMA.bytea8out,
    RECEIVE = MADLIB_SCHEMA.bytea8recv,
    SEND = MADLIB_SCHEMA.bytea8send,
    ALIGNMENT = double,
    STORAGE = external
);
')

/**
 * @brief Get all column names except dependent variable
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__internal_get_col_names_except_dep_variable(
     source_table         VARCHAR       -- name of input  table
   , dependent_varname    VARCHAR       -- name of dependent variable
)
RETURNS VARCHAR AS $$
DECLARE
col_names VARCHAR[];
BEGIN
  EXECUTE 'SELECT ARRAY(SELECT DISTINCT column_name::varchar from ' ||
          ' information_schema.columns WHERE ' ||
          'column_name NOT LIKE ''' || dependent_varname || '''' ||
          'AND table_name LIKE ''' || source_table || ''')'
  INTO col_names;
  RETURN 'ARRAY[' || array_to_string(col_names, ',') || ']';
END;
$$ LANGUAGE plpgsql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `READS SQL DATA', `');

------------------------------------------------------------------------
/**
 * @brief Generate random remporary names for temp table and other names
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__unique_string ()
RETURNS VARCHAR AS $$
PythonFunction(utilities, utilities, unique_string)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');


------------------------------------------------------------------------
/**
 * @brief Takes a string of comma separated values and puts it into an array
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._string_to_array (
  s VARCHAR
)
RETURNS VARCHAR[] AS $$
PythonFunction(utilities, utilities, _string_to_sql_array)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

------------------------------------------------------------------------

/**
 * @brief Cast boolean into text
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.bool_to_text (BOOLEAN)
RETURNS TEXT
STRICT
LANGUAGE SQL AS $$
    SELECT CASE
        WHEN $1
        THEN 'true'
        ELSE 'false'
    END;
$$m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

------------------------------------------------------------------------

/**
 * @brief Cast any value to text.
 *
 * @param val   A value with any specific type.
 *
 * @return The text format string for the value.
 *
 * @note Greenplum doesn't support bit/boolean to text casting.
 *
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__to_char(val anyelement)
RETURNS TEXT
AS 'MODULE_PATHNAME', '__to_text'
LANGUAGE C STRICT IMMUTABLE;

------------------------------------------------------------------------

/*
 * An array_agg() function is defined in module array_ops (to compatibility with
 * GP 4.0.
 */

------------Added for the in-memory group iteration controller----------
DROP TYPE IF EXISTS MADLIB_SCHEMA._grp_state_type CASCADE;
CREATE TYPE MADLIB_SCHEMA._grp_state_type AS(
    grp_key     TEXT,
    iteration   INTEGER,
    state       DOUBLE PRECISION[]
);

CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._gen_state(
    grp_key     TEXT[],
    iteration   INTEGER[],
    state       DOUBLE PRECISION[])
RETURNS SETOF MADLIB_SCHEMA._grp_state_type AS $$
    num_grp = len(grp_key)
    if num_grp == 0:
        return
    num_var = len(state) / num_grp
    for i in range(num_grp):
        if iteration is None:
            yield (grp_key[i], None, state[i * num_var : (i + 1) * num_var] if len(state) > 0 else None)
        else:
            yield (grp_key[i], iteration[i], state[i * num_var : (i + 1) * num_var] if len(state) > 0 else None)
$$ LANGUAGE plpythonu
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------

CREATE OR REPLACE FUNCTION MADLIB_SCHEMA._final_mode(double precision[])
  RETURNS double precision AS
$BODY$
    SELECT a
    FROM unnest($1) a
    GROUP BY 1
    ORDER BY COUNT(1) DESC, 1
    LIMIT 1;
$BODY$
LANGUAGE 'sql' IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

-- Tell Postgres how to use our aggregate
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.mode(double precision) CASCADE;
CREATE AGGREGATE MADLIB_SCHEMA.mode(double precision) (
  SFUNC=array_append, --Function to call for each row. Just builds the array
  STYPE=double precision[],
  FINALFUNC=MADLIB_SCHEMA._final_mode, --Function to call after everything has been added to array
  INITCOND='{}' --Initialize an empty array when starting
);


------------------------------------------------------------------------
/**
 * @brief Creates a new table with a subset of columns dropped from another
 *  source table.
 *
 * @param source_table Name of the table containing the source data.
 * @param out_table Name of the generated table containing the output.
            If a table with the same name already exists, an error will be returned.
 * @param cols_to_drop Comma-separated string of column names from the source
 *          table to drop. An error is returned if the output table does not
 *          contain any columns or if a requested column is not present in the
 *          source table.
 */
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.dropcols (
  source_table      VARCHAR,
  out_table         VARCHAR,
  cols_to_drop      VARCHAR
)
RETURNS void AS $$
PythonFunctionBodyOnly(utilities, utilities)
    from utilities import control
    with control.MinWarning('error'):
        return utilities.create_table_drop_cols(source_table, out_table, cols_to_drop)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');



CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_pg_major_version_less_than(
  compare_version INTEGER)
RETURNS BOOLEAN AS $$
PythonFunction(utilities, utilities, is_pg_major_version_less_than)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

-- Function to trap errors while running sql in plpy
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.trap_error(
  stmt TEXT
)
RETURNS INTEGER AS $$
BEGIN
    BEGIN
        EXECUTE stmt;
    EXCEPTION
        WHEN OTHERS THEN
            RETURN 1;
    END;
    RETURN 0;
END;
$$ LANGUAGE plpgsql;

-- A few of the gucs like plan_cache_mode and dev_opt_unsafe_truncate_in_subtransaction
-- are only available in either > pg 11 or > gpdb 6.5. Using this function we
-- can make sure to run the guc assertion test (assert_guc_value) on the correct
-- platform versions.
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_ver_greater_than_gp_640_or_pg_11()
RETURNS BOOLEAN AS $$
PythonFunctionBodyOnly(utilities, utilities, is_gp_version_less_than)
    from utilities.utilities import __mad_version
    from utilities.utilities import is_platform_pg
    from utilities.utilities import is_pg_major_version_less_than
    if is_platform_pg:
        is_pg_major_less_than_12 = is_pg_major_version_less_than(None, 12)
        return not is_pg_major_less_than_12
    else:
        is_ver_less_than_650 = __mad_version().is_gp_version_less_than('6.5.0')
        return not is_ver_less_than_650
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');


CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.assert_guc_value(
 guc_name           TEXT,
 expected_guc_value TEXT)
RETURNS VOID AS $$
import plpy
actual_guc_value = plpy.execute('show {0}'.format(guc_name))[0]
actual_guc_value = actual_guc_value[guc_name]
if actual_guc_value != expected_guc_value:
    plpy.error('guc {0} assertion failed. Expected Value: {1}, '
               'Actual Value: {2}'.format(guc_name, expected_guc_value, actual_guc_value))
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.is_table_unlogged(
  tbl_name TEXT
)
RETURNS boolean AS $$
DECLARE
ret_val   boolean;
BEGIN
    BEGIN
        SELECT relpersistence='u' INTO ret_val FROM pg_class WHERE relname = tbl_name;
    EXCEPTION
        WHEN OTHERS THEN
            RETURN false; -- Traps exception and returns false, for GP versions < 6 as relpersistence is not a col in pg_class
    END;
    RETURN ret_val;
END;
$$ LANGUAGE plpgsql;
