blob: 8f94bcb4ef7b0d49b2158846111884e03fc84db7 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file dt_utility.sql_in
*
* @brief Utility functions widely used in C4.5 and random forest.
* @date April 5, 2012
*
*//* ----------------------------------------------------------------------- */
/*
* @brief Cast any value to text.
*
* @param val A value with any specific type.
*
* @return The text format string for the value.
*
* @note Greenplum doesn't support boolean to text casting.
*
*/
DROP FUNCTION IF EXISTS MADLIB_SCHEMA.__to_char
(
val anyelement
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__to_char
(
val anyelement
)
RETURNS TEXT AS $$
DECLARE
is_bool BOOLEAN;
BEGIN
IF (val is NULL) THEN
return 'null'::text;
END IF;
SELECT pg_typeof(val) = 'boolean'::regtype INTO is_bool;
IF (is_bool) THEN
IF (val::boolean) THEN
RETURN 'true';
ELSE
RETURN 'false';
END IF;
END IF;
RETURN val::TEXT;
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Cast regclass to text. we will not create a cast,
* since it may override the existing cast.
* Although there is no cast for regclass to text,
* PL/PGSQL can coerce it to text automatically.
* Another implementation can use sql function:
* select textin(regclassout('pg_class'::regclass));
*
* @param rc The regclass of the table.
*
* @return The text representation for the regclass.
*
*/
CREATE or replace FUNCTION MADLIB_SCHEMA.__regclass_to_text
(
rc regclass
)
RETURNS TEXT
AS $$
BEGIN
RETURN rc;
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Format a string with the value in args array.
*
* @param fmt The format string.
* @param args The specified elements in format string.
*
* @return The formated string.
*
* @note Each '%' in fmt will be replaced with the corresponding value of args.
* The number of '%'s should equal to the length of array args.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
args TEXT[]
)
RETURNS TEXT
AS 'MODULE_PATHNAME', 'dt_text_format'
LANGUAGE C IMMUTABLE;
/*
* @brief Short form to format a string with four parameters.
*
* @param arg1 The first argument.
* @param arg2 The second argument.
* @param arg3 The third argument.
* @param arg4 The fouth argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT,
arg2 TEXT,
arg3 TEXT,
arg4 TEXT
)
RETURNS TEXT AS $$
DECLARE
stmt TEXT := '';
BEGIN
SELECT MADLIB_SCHEMA.__format(fmt, ARRAY[arg1, arg2, arg3, arg4]) INTO stmt;
RETURN stmt;
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Short form to format a string with three parameters.
*
* @param arg1 The first argument.
* @param arg2 The second argument.
* @param arg3 The third argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT,
arg2 TEXT,
arg3 TEXT
)
RETURNS TEXT AS $$
DECLARE
stmt TEXT := '';
BEGIN
SELECT MADLIB_SCHEMA.__format(fmt, ARRAY[arg1, arg2, arg3]) INTO stmt;
RETURN stmt;
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Short form to format a string with two parameters.
*
* @param arg1 The first argument.
* @param arg2 The second argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT,
arg2 TEXT
)
RETURNS TEXT AS $$
DECLARE
stmt TEXT := '';
BEGIN
SELECT MADLIB_SCHEMA.__format(fmt, ARRAY[arg1, arg2]) INTO stmt;
RETURN stmt;
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Short form to format a string with a parameter.
*
* @param arg1 The first argument.
*
* @return The formated string.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__format
(
fmt TEXT,
arg1 TEXT
)
RETURNS TEXT AS $$
DECLARE
stmt TEXT := '';
BEGIN
SELECT MADLIB_SCHEMA.__format(fmt, ARRAY[arg1]) INTO stmt;
RETURN stmt;
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Raise exception if the condition is false.
*
* @param condition The assert condition.
* @param reason The reason string displayed when assert failure.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert
(
condition BOOLEAN,
reason TEXT
)
RETURNS void AS $$
BEGIN
IF (NOT condition) THEN
RAISE EXCEPTION 'ERROR: %', reason;
END IF;
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Test if the specified table exists or not.
*
* @param full_table_name The full table name.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__table_exists
(
full_table_name TEXT
)
RETURNS BOOLEAN AS
'MODULE_PATHNAME', 'table_exists'
LANGUAGE C IMMUTABLE;
/*
* @brief Test if the specified column exists or not.
*
* @param full_table_name The full table name.
*
* @return True if the column exists, otherwise return false.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__column_exists
(
full_table_name TEXT,
column_name TEXT
)
RETURNS BOOLEAN AS $$
DECLARE
curstmt TEXT := '';
result INT := 0;
BEGIN
PERFORM MADLIB_SCHEMA.__assert
(
(full_table_name IS NOT NULL) AND (column_name IS NOT NULL),
'the table name and column name must not be null'
);
IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN
SELECT MADLIB_SCHEMA.__format
(
'SELECT COUNT(*)
FROM pg_catalog.pg_attribute
WHERE attnum > 0 AND
(NOT attisdropped) AND
attname = ''%'' AND
attrelid = ''%''::regclass',
ARRAY[
column_name,
full_table_name
]
) INTO curstmt;
EXECUTE curstmt INTO result;
RETURN result >= 1;
END IF;
RETURN 'f';
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Assert if the specified table exists or not.
*
* @param full_table_name The full table name.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__assert_table
(
full_table_name TEXT,
existence BOOLEAN
)
RETURNS void AS $$
DECLARE
err_msg TEXT;
BEGIN
IF (existence) THEN
err_msg = 'assertion failure. Table: ''' || full_table_name ||
''' does not exist';
ELSE
err_msg = 'assertion failure. Table: ''' || full_table_name ||
''' already exists';
END IF;
PERFORM MADLIB_SCHEMA.__assert
(
MADLIB_SCHEMA.__table_exists(full_table_name) = existence,
err_msg
);
END
$$ LANGUAGE PLPGSQL;
/*
* @brief Strip the schema name from the full table name.
*
* @param full_table_name The full table name.
*
* @return The table name without schema name.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__strip_schema_name
(
full_table_name TEXT
)
RETURNS TEXT AS $$
DECLARE
str_val TEXT;
BEGIN
PERFORM MADLIB_SCHEMA.__assert
(
full_table_name IS NOT NULL,
'table name should not be null'
);
str_val = trim(both ' ' FROM split_part(full_table_name, '.', 2));
IF (length(str_val) = 0) THEN
str_val = btrim(full_table_name, ' ');
END IF;
RETURN lower(str_val);
end
$$ LANGUAGE PLPGSQL;
/*
* @brief Get the schema name from a full table name.
* if there is no schema name in the full table name, then
* if the table exists, we return the schema name from catalog
* else the current schema name,
* else return the schema name from the full table name directly.
*
* @param full_table_name The full table name.
*
* @return The schema name of the table.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__get_schema_name
(
full_table_name TEXT
)
RETURNS TEXT AS $$
DECLARE
table_name TEXT;
temp TEXT[];
len INT;
curstmt TEXT;
schema_name TEXT;
BEGIN
PERFORM MADLIB_SCHEMA.__assert
(
full_table_name IS NOT NULL,
'table name should not be null'
);
temp = string_to_array(full_table_name, '.');
len = array_upper(temp, 1);
IF (1 = len) THEN
-- if table exists, return the schema name from catalog
IF (MADLIB_SCHEMA.__table_exists(full_table_name)) THEN
SELECT nspname
FROM pg_catalog.pg_namespace n
WHERE n.oid =
(
SELECT relnamespace FROM pg_catalog.pg_class
WHERE oid= full_table_name::regclass
)
INTO schema_name;
ELSE
-- get the current schema name
schema_name = current_schema();
END IF;
ELSE
PERFORM MADLIB_SCHEMA.__assert
(
len = 2,
'wrong full table name<' || full_table_name || '>'
);
-- get the shema name directly
schema_name = lower(btrim(temp[1], ' '));
END IF;
RETURN schema_name;
end
$$ LANGUAGE PLPGSQL;
/*
* @brief Test if the given element is in the specified array or not.
*
* @param find The element to be found.
* @param arr The array containing the elements.
*
* @return True the element is in the array. Otherwise returns false.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__array_search
(
find ANYELEMENT,
arr ANYARRAY
)
RETURNS BOOLEAN AS $$
SELECT count(*) = 1
FROM
(
SELECT unnest($2) as elem
) t
WHERE $1 IS NOT DISTINCT FROM elem
$$ LANGUAGE sql STABLE;
/*
* @brief Test if each element in the given array is a column of the table.
*
* @param column_names The array containing the columns to be tested.
* @param table_name The full table name.
*
* @return True if each element of column_names is a column of the table.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__columns_in_table
(
column_names TEXT[],
table_name TEXT
)
RETURNS BOOLEAN AS $$
SELECT count(*) = 0
FROM
(
SELECT unnest($1)
EXCEPT
SELECT attname
FROM pg_attribute
WHERE attrelid = $2::regclass AND
attnum > 0 AND
NOT attisdropped
) t;
$$ LANGUAGE sql STABLE;
/*
* @brief Get the number of columns for a given table.
*
* @param table_name The full table name.
*
* @return The number of columns in the given table.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__num_of_columns
(
table_name TEXT
)
RETURNS INT AS $$
SELECT count(attname)::INT
FROM pg_attribute
WHERE attrelid = $1::regclass AND
attnum > 0 AND
NOT attisdropped
$$ LANGUAGE sql STABLE;
/*
* @brief Convert a string with delimiter ',' to an array.
* Each element in the array is trimed from the start
* and end using a space.
*
* @param csv_str The string with elements delimited by ','.
*
* @return The splitting string array.
*
* @note If the input string is NULL or an empty string
* after trimmed with ' ', then NULL will be returned.
*
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__csvstr_to_array
(
csv_str TEXT
)
RETURNS TEXT[] AS $$
DECLARE
ret TEXT[];
str_val TEXT;
index INTEGER;
BEGIN
ret = string_to_array(lower(btrim(csv_str, ' ')), ',');
-- if the input array is NULL or an empty one,
-- then we return NULL directly
-- (array_upper will return non-NULL otherwise)
IF (array_upper(ret, 1) IS NULL) THEN
RETURN NULL;
END IF;
-- null or empty array will be filtered
FOR index IN 1..array_upper(ret, 1) LOOP
ret[index] = btrim(ret[index], ' ');
END LOOP;
RETURN ret;
END
$$ LANGUAGE PLPGSQL;