blob: 93a7779c5686b7244da63bf63e0a19d4959c8e36 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file quantile.sql_in
*
* @brief SQL function for Quantile
* @date January 2011
*
* @sa For a brief introduction to quantiles, see the module
* description \ref grp_quantile.
*
*//* ----------------------------------------------------------------------- */
/**
@addtogroup grp_quantile
@about
This function computes the specified quantile value. It reads the name of the table, the specific column, and
computes the quantile value based on the fraction specified as the third argument.
For a different implementation of quantile check out the cmsketch_centile()
aggregate in the \ref grp_countmin module.
@prereq
None
@usage
Function: <tt>quantile( '<em>table_name</em>', '<em>col_name</em>',
<em>quantile</em>)</tt>
@examp
-# Prepare some input:\n
<tt>CREATE TABLE tab1 AS SELECT generate_series( 1,1000) as col1;</tt>
-# Run the quantile() function:\n
<tt>SELECT madlib.quantile( 'tab1', 'col1', .3);</tt>
@sa file quantile.sql_in (documenting the SQL function),
module grp_countmin (for an approximate quantile implementation)
*/
/**
* @brief Compute a quantile
*
* @param table_name name of the table from which quantile is to be taken
* @param col_name name of the column that is to be used for quantile calculation
* @param quantile desired quantile value \f$ \in (0,1) \f$
* @returns The quantile value
*
* This function computes the specified quantile value. It reads the name of the
* table, the specific column, and computes the quantile value based on the
* fraction specified as the third argument.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.quantile( table_name TEXT, col_name TEXT, quantile FLOAT) RETURNS FLOAT AS $$
declare
size FLOAT[];
curr FLOAT;
Begin
EXECUTE 'SELECT ARRAY[COUNT(*), MIN( ' || col_name || ' ), MAX(' || col_name || ' ), AVG( ' || col_name || ' )] FROM ' || table_name || ';' INTO size;
size[1] = size[1]*quantile;
LOOP
EXECUTE 'SELECT COUNT(*) FROM '|| table_name || ' WHERE ' || col_name || ' <= ' || size[4] || ';' INTO curr;
-- RAISE INFO 'VALUES NOW: % WANT: % FRACT: % MIN: % MAX: % CURR: %', curr, size[1], curr/size[1] + size[1]/curr, size[2], size[3], size[4];
IF((curr - size[1]) > 1) THEN
size[3] = size[4];
size[4] = (size[2]+size[4])/2.0;
ELSIF((size[1] - curr) > 1) THEN
size[2] = size[4];
size[4] = (size[3]+size[4])/2.0;
ELSE
RETURN size[4];
END IF;
END LOOP;
RETURN size[4];
end
$$ LANGUAGE plpgsql;