| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file dt_utility.sql_in |
| * |
| * @brief Utility functions widely used in C4.5 and random forest. |
| * @date April 5, 2012 |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| |
| /* |
| * @brief Cast any value to text. |
| * |
| * @param val A value with any specific type. |
| * |
| * @return The text format string for the value. |
| * |
| * @note Greenplum doesn't support boolean to text casting. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__to_char |
| ( |
| val anyelement |
| ) |
| RETURNS TEXT |
| AS 'MODULE_PATHNAME', 'dt_to_text' |
| LANGUAGE C STRICT IMMUTABLE; |
| |
| |
| /* |
| * @brief Cast regclass to text. we will not create a cast, |
| * since it may override the existing cast. |
| * Although there is no cast for regclass to text, |
| * PL/PGSQL can coerce it to text automatically. |
| * Another implementation can use sql function: |
| * select textin(regclassout('pg_class'::regclass)); |
| * |
| * @param rc The regclass of the table. |
| * |
| * @return The text representation for the regclass. |
| * |
| */ |
| CREATE or replace FUNCTION MADLIB_SCHEMA.__regclass_to_text |
| ( |
| rc regclass |
| ) |
| RETURNS TEXT |
| AS $$ |
| BEGIN |
| RETURN rc; |
| END |
| $$ LANGUAGE PLPGSQL IMMUTABLE; |
| |
| |
| /* |
| * @brief Format a string with the value in args array. |
| * |
| * @param fmt The format string. |
| * @param args The specified elements in format string. |
| * |
| * @return The formated string. |
| * |
| * @note Each '%' in fmt will be replaced with the corresponding value of args. |
| * The number of '%'s should equal to the length of array args. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format |
| ( |
| fmt TEXT, |
| args TEXT[] |
| ) |
| RETURNS TEXT |
| AS 'MODULE_PATHNAME', 'dt_text_format' |
| LANGUAGE C IMMUTABLE; |
| |
| |
| /* |
| * @brief Short form to format a string with four parameters. |
| * |
| * @param arg1 The first argument. |
| * @param arg2 The second argument. |
| * @param arg3 The third argument. |
| * @param arg4 The fouth argument. |
| * |
| * @return The formated string. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format |
| ( |
| fmt TEXT, |
| arg1 TEXT, |
| arg2 TEXT, |
| arg3 TEXT, |
| arg4 TEXT |
| ) |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3, $4, $5]); |
| $$ LANGUAGE sql IMMUTABLE; |
| |
| |
| /* |
| * @brief Short form to format a string with three parameters. |
| * |
| * @param arg1 The first argument. |
| * @param arg2 The second argument. |
| * @param arg3 The third argument. |
| * |
| * @return The formated string. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format |
| ( |
| fmt TEXT, |
| arg1 TEXT, |
| arg2 TEXT, |
| arg3 TEXT |
| ) |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3, $4]); |
| $$ LANGUAGE sql IMMUTABLE; |
| |
| |
| /* |
| * @brief Short form to format a string with two parameters. |
| * |
| * @param arg1 The first argument. |
| * @param arg2 The second argument. |
| * |
| * @return The formated string. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format |
| ( |
| fmt TEXT, |
| arg1 TEXT, |
| arg2 TEXT |
| ) |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3]); |
| $$ LANGUAGE sql IMMUTABLE; |
| |
| |
| /* |
| * @brief Short form to format a string with a parameter. |
| * |
| * @param arg1 The first argument. |
| * |
| * @return The formated string. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format |
| ( |
| fmt TEXT, |
| arg1 TEXT |
| ) |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2]); |
| $$ LANGUAGE sql IMMUTABLE; |
| |
| |
| /* |
| * @brief Raise exception if the condition is false. |
| * |
| * @param condition The assert condition. |
| * @param reason The reason string displayed when assert failure. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert |
| ( |
| condition BOOLEAN, |
| reason TEXT |
| ) |
| RETURNS void AS $$ |
| BEGIN |
| IF (NOT condition) THEN |
| RAISE EXCEPTION 'ERROR: %', reason; |
| END IF; |
| END |
| $$ LANGUAGE PLPGSQL IMMUTABLE; |
| |
| |
| /* |
| * @brief Test if the specified table exists or not. |
| * |
| * @param full_table_name The full table name. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__table_exists |
| ( |
| full_table_name TEXT |
| ) |
| RETURNS BOOLEAN AS |
| 'MODULE_PATHNAME', 'table_exists' |
| LANGUAGE C IMMUTABLE; |
| |
| |
| /* |
| * @brief Test if the specified column exists or not. |
| * |
| * @param full_table_name The full table name. |
| * |
| * @return True if the column exists, otherwise return false. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__column_exists |
| ( |
| full_table_name TEXT, |
| column_name TEXT |
| ) |
| RETURNS BOOLEAN AS $$ |
| DECLARE |
| curstmt TEXT := ''; |
| result INT := 0; |
| BEGIN |
| PERFORM MADLIB_SCHEMA.__assert |
| ( |
| (full_table_name IS NOT NULL) AND (column_name IS NOT NULL), |
| 'the table name and column name must not be null' |
| ); |
| |
| IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN |
| SELECT MADLIB_SCHEMA.__format |
| ( |
| 'SELECT COUNT(*) |
| FROM pg_catalog.pg_attribute |
| WHERE attnum > 0 AND |
| (NOT attisdropped) AND |
| attname = ''%'' AND |
| attrelid = ''%''::regclass', |
| ARRAY[ |
| column_name, |
| full_table_name |
| ] |
| ) INTO curstmt; |
| |
| EXECUTE curstmt INTO result; |
| |
| RETURN result >= 1; |
| END IF; |
| |
| RETURN 'f'; |
| END |
| $$ LANGUAGE PLPGSQL STABLE; |
| |
| |
| /* |
| * @brief Assert if the specified table exists or not. |
| * |
| * @param full_table_name The full table name. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert_table |
| ( |
| full_table_name TEXT, |
| existence BOOLEAN |
| ) |
| RETURNS void AS $$ |
| DECLARE |
| err_msg TEXT; |
| BEGIN |
| IF (existence) THEN |
| err_msg = 'assertion failure. Table: ''' || full_table_name || |
| ''' does not exist'; |
| ELSE |
| err_msg = 'assertion failure. Table: ''' || full_table_name || |
| ''' already exists'; |
| END IF; |
| |
| PERFORM MADLIB_SCHEMA.__assert |
| ( |
| MADLIB_SCHEMA.__table_exists(full_table_name) = existence, |
| err_msg |
| ); |
| END |
| $$ LANGUAGE PLPGSQL STABLE; |
| |
| |
| /* |
| * @brief Strip the schema name from the full table name. |
| * |
| * @param full_table_name The full table name. |
| * |
| * @return The table name without schema name. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__strip_schema_name |
| ( |
| full_table_name TEXT |
| ) |
| RETURNS TEXT AS $$ |
| DECLARE |
| str_val TEXT; |
| BEGIN |
| PERFORM MADLIB_SCHEMA.__assert |
| ( |
| full_table_name IS NOT NULL, |
| 'table name should not be null' |
| ); |
| |
| |
| str_val = trim(both ' ' FROM split_part(full_table_name, '.', 2)); |
| |
| IF (length(str_val) = 0) THEN |
| str_val = btrim(full_table_name, ' '); |
| END IF; |
| |
| RETURN lower(str_val); |
| end |
| $$ LANGUAGE PLPGSQL IMMUTABLE; |
| |
| |
| /* |
| * @brief Get the schema name from a full table name. |
| * if there is no schema name in the full table name, then |
| * if the table exists, we return the schema name from catalog |
| * else the current schema name, |
| * else return the schema name from the full table name directly. |
| * |
| * @param full_table_name The full table name. |
| * |
| * @return The schema name of the table. |
| * |
| * @note This function should be VOLATILE, since we may get schema name |
| * from a new created table in the same transaction. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_schema_name |
| ( |
| full_table_name TEXT |
| ) |
| RETURNS TEXT AS $$ |
| DECLARE |
| table_name TEXT; |
| temp TEXT[]; |
| len INT; |
| curstmt TEXT; |
| schema_name TEXT; |
| BEGIN |
| PERFORM MADLIB_SCHEMA.__assert |
| ( |
| full_table_name IS NOT NULL, |
| 'table name should not be null' |
| ); |
| |
| temp = string_to_array(full_table_name, '.'); |
| len = array_upper(temp, 1); |
| |
| IF (1 = len) THEN |
| -- if table exists, return the schema name from catalog |
| IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN |
| SELECT nspname |
| FROM pg_catalog.pg_namespace n |
| WHERE n.oid = |
| ( |
| SELECT relnamespace FROM pg_catalog.pg_class |
| WHERE oid= full_table_name::regclass |
| ) |
| |
| INTO schema_name; |
| ELSE |
| -- get the current schema name |
| schema_name = current_schema(); |
| END IF; |
| ELSE |
| PERFORM MADLIB_SCHEMA.__assert |
| ( |
| len = 2, |
| 'wrong full table name<' || full_table_name || '>' |
| ); |
| -- get the shema name directly |
| schema_name = lower(btrim(temp[1], ' ')); |
| END IF; |
| |
| RETURN schema_name; |
| end |
| $$ LANGUAGE PLPGSQL; |
| |
| |
| /* |
| * @brief Test if the given element is in the specified array or not. |
| * |
| * @param find The element to be found. |
| * @param arr The array containing the elements. |
| * |
| * @return True the element is in the array. Otherwise returns false. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_search |
| ( |
| find ANYELEMENT, |
| arr ANYARRAY |
| ) |
| RETURNS BOOLEAN AS $$ |
| SELECT count(*) = 1 |
| FROM |
| ( |
| SELECT unnest($2) as elem |
| ) t |
| WHERE $1 IS NOT DISTINCT FROM elem |
| $$ LANGUAGE sql IMMUTABLE; |
| |
| |
| /* |
| * @brief Test if each element in the given array is a column of the table. |
| * |
| * @param column_names The array containing the columns to be tested. |
| * @param table_name The full table name. |
| * |
| * @return True if each element of column_names is a column of the table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__columns_in_table |
| ( |
| column_names TEXT[], |
| table_name TEXT |
| ) |
| RETURNS BOOLEAN AS $$ |
| SELECT count(*) = 0 |
| FROM |
| ( |
| SELECT unnest($1) |
| EXCEPT |
| SELECT quote_ident(attname) |
| FROM pg_attribute |
| WHERE attrelid = $2::regclass AND |
| attnum > 0 AND |
| NOT attisdropped |
| ) t; |
| $$ LANGUAGE sql STABLE; |
| |
| |
| /* |
| * @brief Get the number of columns for a given table. |
| * |
| * @param table_name The full table name. |
| * |
| * @return The number of columns in the given table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_columns |
| ( |
| table_name TEXT |
| ) |
| RETURNS INT AS $$ |
| SELECT count(attname)::INT |
| FROM pg_attribute |
| WHERE attrelid = $1::regclass AND |
| attnum > 0 AND |
| NOT attisdropped |
| $$ LANGUAGE sql STABLE; |
| |
| |
| /* |
| * @brief Convert a string with delimiter ',' to an array. |
| * Each element in the array is trimed from the start |
| * and end using a space. |
| * |
| * @param csv_str The string with elements delimited by ','. |
| * |
| * @return The splitting string array. |
| * |
| * @note If the input string is NULL or an empty string |
| * after trimmed with ' ', then NULL will be returned. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__csvstr_to_array |
| ( |
| csv_str TEXT |
| ) |
| RETURNS TEXT[] AS $$ |
| DECLARE |
| ret TEXT[]; |
| str_val TEXT; |
| index INTEGER; |
| BEGIN |
| ret = string_to_array(lower(btrim(csv_str, ' ')), ','); |
| |
| -- if the input array is NULL or an empty one, |
| -- then we return NULL directly |
| -- (array_upper will return non-NULL otherwise) |
| IF (array_upper(ret, 1) IS NULL) THEN |
| RETURN NULL; |
| END IF; |
| |
| -- null or empty array will be filtered |
| FOR index IN 1..array_upper(ret, 1) LOOP |
| ret[index] = quote_ident(btrim(ret[index], ' ')); |
| END LOOP; |
| |
| RETURN ret; |
| END |
| $$ LANGUAGE PLPGSQL IMMUTABLE; |
| |
| |
| /* |
| * @brief Retrieve a BOOL array. The ith element in the array |
| * indicate whether arr2[i] is in arr1 or not. The size |
| * of the BOOL array is the same as arr2. For example, |
| * arr1 = ['aa', 'bb', 'dd'], |
| * arr2 = ['aa', 'dd', 'bb', 'ee', 'ccc'] |
| * then the BOOL array is: |
| * ['t', 't', 't', 'f', 'f'] |
| * |
| * @param src_arr The source array. |
| * @param tst_arr The array to be tested. |
| * |
| * @return A BOOL array. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_elem_in |
| ( |
| src_arr ANYARRAY, |
| tst_arr ANYARRAY |
| ) |
| RETURNS BOOLEAN[] AS $$ |
| SELECT array_agg(elem in (SELECT unnest($1))) |
| FROM |
| ( |
| SELECT unnest($2) as elem |
| ) t |
| $$ LANGUAGE sql IMMUTABLE; |
| |
| |
| /* |
| * @brief Sort the given array. |
| * |
| * @param arr The array to be sorted. |
| * |
| * @return The sorted array. |
| * |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_sort |
| ( |
| arr ANYARRAY |
| ) |
| RETURNS ANYARRAY AS $$ |
| SELECT ARRAY |
| (SELECT elem FROM unnest($1) elem ORDER BY elem LIMIT ALL) |
| $$ LANGUAGE sql IMMUTABLE; |
| |