blob: af39c47172672d179f257251ad6ccc5d59b91391 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file dt_utility.sql_in
*
* @brief Utility functions widely used in C4.5 and random forest.
* @date April 5, 2012
*
*//* ----------------------------------------------------------------------- */
/*
* @brief Cast any value to text.
*
* @param val A value with any specific type.
*
* @return The text format string for the value.
*
* @note Greenplum doesn't support boolean to text casting.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__to_char
(
val anyelement
)
RETURNS TEXT
AS 'MODULE_PATHNAME', 'dt_to_text'
LANGUAGE C STRICT IMMUTABLE;
/*
* @brief Cast regclass to text. we will not create a cast,
* since it may override the existing cast.
* Although there is no cast for regclass to text,
* PL/PGSQL can coerce it to text automatically.
* Another implementation can use sql function:
* select textin(regclassout('pg_class'::regclass));
*
* @param rc The regclass of the table.
*
* @return The text representation for the regclass.
*
*/
CREATE or replace FUNCTION MADLIB_SCHEMA.__regclass_to_text
(
rc regclass
)
RETURNS TEXT
AS $$
BEGIN
RETURN rc;
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
/*
* @brief Format a string with the value in args array.
*
* @param fmt The format string.
* @param args The specified elements in format string.
*
* @return The formated string.
*
* @note Each '%' in fmt will be replaced with the corresponding value of args.
* The number of '%'s should equal to the length of array args.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
args TEXT[]
)
RETURNS TEXT
AS 'MODULE_PATHNAME', 'dt_text_format'
LANGUAGE C IMMUTABLE;
/*
* @brief Short form to format a string with four parameters.
*
* @param arg1 The first argument.
* @param arg2 The second argument.
* @param arg3 The third argument.
* @param arg4 The fouth argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT,
arg2 TEXT,
arg3 TEXT,
arg4 TEXT
)
RETURNS TEXT AS $$
SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3, $4, $5]);
$$ LANGUAGE sql IMMUTABLE;
/*
* @brief Short form to format a string with three parameters.
*
* @param arg1 The first argument.
* @param arg2 The second argument.
* @param arg3 The third argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT,
arg2 TEXT,
arg3 TEXT
)
RETURNS TEXT AS $$
SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3, $4]);
$$ LANGUAGE sql IMMUTABLE;
/*
* @brief Short form to format a string with two parameters.
*
* @param arg1 The first argument.
* @param arg2 The second argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT,
arg2 TEXT
)
RETURNS TEXT AS $$
SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2, $3]);
$$ LANGUAGE sql IMMUTABLE;
/*
* @brief Short form to format a string with a parameter.
*
* @param arg1 The first argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT
)
RETURNS TEXT AS $$
SELECT MADLIB_SCHEMA.__format($1, ARRAY[$2]);
$$ LANGUAGE sql IMMUTABLE;
/*
* @brief Raise exception if the condition is false.
*
* @param condition The assert condition.
* @param reason The reason string displayed when assert failure.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert
(
condition BOOLEAN,
reason TEXT
)
RETURNS void AS $$
BEGIN
IF (NOT condition) THEN
RAISE EXCEPTION 'ERROR: %', reason;
END IF;
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
/*
* @brief Test if the specified table exists or not.
*
* @param full_table_name The full table name.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__table_exists
(
full_table_name TEXT
)
RETURNS BOOLEAN AS
'MODULE_PATHNAME', 'table_exists'
LANGUAGE C IMMUTABLE;
/*
* @brief Test if the specified column exists or not.
*
* @param full_table_name The full table name.
*
* @return True if the column exists, otherwise return false.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__column_exists
(
full_table_name TEXT,
column_name TEXT
)
RETURNS BOOLEAN AS $$
DECLARE
curstmt TEXT := '';
result INT := 0;
BEGIN
PERFORM MADLIB_SCHEMA.__assert
(
(full_table_name IS NOT NULL) AND (column_name IS NOT NULL),
'the table name and column name must not be null'
);
IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN
SELECT MADLIB_SCHEMA.__format
(
'SELECT COUNT(*)
FROM pg_catalog.pg_attribute
WHERE attnum > 0 AND
(NOT attisdropped) AND
attname = ''%'' AND
attrelid = ''%''::regclass',
ARRAY[
column_name,
full_table_name
]
) INTO curstmt;
EXECUTE curstmt INTO result;
RETURN result >= 1;
END IF;
RETURN 'f';
END
$$ LANGUAGE PLPGSQL STABLE;
/*
* @brief Assert if the specified table exists or not.
*
* @param full_table_name The full table name.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert_table
(
full_table_name TEXT,
existence BOOLEAN
)
RETURNS void AS $$
DECLARE
err_msg TEXT;
BEGIN
IF (existence) THEN
err_msg = 'assertion failure. Table: ''' || full_table_name ||
''' does not exist';
ELSE
err_msg = 'assertion failure. Table: ''' || full_table_name ||
''' already exists';
END IF;
PERFORM MADLIB_SCHEMA.__assert
(
MADLIB_SCHEMA.__table_exists(full_table_name) = existence,
err_msg
);
END
$$ LANGUAGE PLPGSQL STABLE;
/*
* @brief Strip the schema name from the full table name.
*
* @param full_table_name The full table name.
*
* @return The table name without schema name.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__strip_schema_name
(
full_table_name TEXT
)
RETURNS TEXT AS $$
DECLARE
str_val TEXT;
BEGIN
PERFORM MADLIB_SCHEMA.__assert
(
full_table_name IS NOT NULL,
'table name should not be null'
);
str_val = trim(both ' ' FROM split_part(full_table_name, '.', 2));
IF (length(str_val) = 0) THEN
str_val = btrim(full_table_name, ' ');
END IF;
RETURN lower(str_val);
end
$$ LANGUAGE PLPGSQL IMMUTABLE;
/*
* @brief Get the schema name from a full table name.
* if there is no schema name in the full table name, then
* if the table exists, we return the schema name from catalog
* else the current schema name,
* else return the schema name from the full table name directly.
*
* @param full_table_name The full table name.
*
* @return The schema name of the table.
*
* @note This function should be VOLATILE, since we may get schema name
* from a new created table in the same transaction.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_schema_name
(
full_table_name TEXT
)
RETURNS TEXT AS $$
DECLARE
table_name TEXT;
temp TEXT[];
len INT;
curstmt TEXT;
schema_name TEXT;
BEGIN
PERFORM MADLIB_SCHEMA.__assert
(
full_table_name IS NOT NULL,
'table name should not be null'
);
temp = string_to_array(full_table_name, '.');
len = array_upper(temp, 1);
IF (1 = len) THEN
-- if table exists, return the schema name from catalog
IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN
SELECT nspname
FROM pg_catalog.pg_namespace n
WHERE n.oid =
(
SELECT relnamespace FROM pg_catalog.pg_class
WHERE oid= full_table_name::regclass
)
INTO schema_name;
ELSE
-- get the current schema name
schema_name = current_schema();
END IF;
ELSE
PERFORM MADLIB_SCHEMA.__assert
(
len = 2,
'wrong full table name<' || full_table_name || '>'
);
-- get the shema name directly
schema_name = lower(btrim(temp[1], ' '));
END IF;
RETURN schema_name;
end
$$ LANGUAGE PLPGSQL;
/*
* @brief Test if the given element is in the specified array or not.
*
* @param find The element to be found.
* @param arr The array containing the elements.
*
* @return True the element is in the array. Otherwise returns false.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_search
(
find ANYELEMENT,
arr ANYARRAY
)
RETURNS BOOLEAN AS $$
SELECT count(*) = 1
FROM
(
SELECT unnest($2) as elem
) t
WHERE $1 IS NOT DISTINCT FROM elem
$$ LANGUAGE sql IMMUTABLE;
/*
* @brief Test if each element in the given array is a column of the table.
*
* @param column_names The array containing the columns to be tested.
* @param table_name The full table name.
*
* @return True if each element of column_names is a column of the table.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__columns_in_table
(
column_names TEXT[],
table_name TEXT
)
RETURNS BOOLEAN AS $$
SELECT count(*) = 0
FROM
(
SELECT unnest($1)
EXCEPT
SELECT quote_ident(attname)
FROM pg_attribute
WHERE attrelid = $2::regclass AND
attnum > 0 AND
NOT attisdropped
) t;
$$ LANGUAGE sql STABLE;
/*
* @brief Get the number of columns for a given table.
*
* @param table_name The full table name.
*
* @return The number of columns in the given table.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_columns
(
table_name TEXT
)
RETURNS INT AS $$
SELECT count(attname)::INT
FROM pg_attribute
WHERE attrelid = $1::regclass AND
attnum > 0 AND
NOT attisdropped
$$ LANGUAGE sql STABLE;
/*
* @brief Convert a string with delimiter ',' to an array.
* Each element in the array is trimed from the start
* and end using a space.
*
* @param csv_str The string with elements delimited by ','.
*
* @return The splitting string array.
*
* @note If the input string is NULL or an empty string
* after trimmed with ' ', then NULL will be returned.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__csvstr_to_array
(
csv_str TEXT
)
RETURNS TEXT[] AS $$
DECLARE
ret TEXT[];
str_val TEXT;
index INTEGER;
BEGIN
ret = string_to_array(lower(btrim(csv_str, ' ')), ',');
-- if the input array is NULL or an empty one,
-- then we return NULL directly
-- (array_upper will return non-NULL otherwise)
IF (array_upper(ret, 1) IS NULL) THEN
RETURN NULL;
END IF;
-- null or empty array will be filtered
FOR index IN 1..array_upper(ret, 1) LOOP
ret[index] = quote_ident(btrim(ret[index], ' '));
END LOOP;
RETURN ret;
END
$$ LANGUAGE PLPGSQL IMMUTABLE;
/*
* @brief Retrieve a BOOL array. The ith element in the array
* indicate whether arr2[i] is in arr1 or not. The size
* of the BOOL array is the same as arr2. For example,
* arr1 = ['aa', 'bb', 'dd'],
* arr2 = ['aa', 'dd', 'bb', 'ee', 'ccc']
* then the BOOL array is:
* ['t', 't', 't', 'f', 'f']
*
* @param src_arr The source array.
* @param tst_arr The array to be tested.
*
* @return A BOOL array.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_elem_in
(
src_arr ANYARRAY,
tst_arr ANYARRAY
)
RETURNS BOOLEAN[] AS $$
SELECT array_agg(elem in (SELECT unnest($1)))
FROM
(
SELECT unnest($2) as elem
) t
$$ LANGUAGE sql IMMUTABLE;
/*
* @brief Sort the given array.
*
* @param arr The array to be sorted.
*
* @return The sorted array.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_sort
(
arr ANYARRAY
)
RETURNS ANYARRAY AS $$
SELECT ARRAY
(SELECT elem FROM unnest($1) elem ORDER BY elem LIMIT ALL)
$$ LANGUAGE sql IMMUTABLE;