| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file sample.sql_in |
| * |
| * @brief SQL functions for random sampling |
| * |
| * @sa For an overview of random-sampling functions, see the module |
| * description \ref grp_sample. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /** |
| @addtogroup grp_sample |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li><a href="#func_list">Functions</a></li> |
| <li><a href="#related">Related Topics</a></li> |
| </ul> |
| </div> |
| |
| @brief Provides utility functions for sampling operations. |
| |
| \warning <em> This MADlib method is still in early stage development. |
| Interface and implementation are subject to change. </em> |
| |
| The random sampling module consists of useful utility functions for sampling |
| operations. These functions can be used while implementing |
| new algorithms. |
| |
| @anchor syntax |
| @par Functions |
| |
| Sample a single row according to weights. |
| <pre class="syntax"> |
| weighted_sample( value, |
| weight |
| ) |
| </pre> |
| |
| \b Arguments |
| <dl class="arglist"> |
| <dt>value</dt> |
| <dd>BIGINT or FLOAT8[]. Value of row. Uniqueness is not enforced. If a value occurs multiple times, the probability of sampling this value is proportional to the sum of its weights. </dd> |
| <dt>weight</dt> |
| <dd>FLOAT8. Weight for row. A negative value here is treated has zero weight. </dt> |
| </dl> |
| |
| |
| |
| |
| |
| Refer to the file for documentation on each of the utility functions. |
| |
| @anchor related |
| @par Related Topics |
| |
| |
| @sa File sample.sql_in documenting the SQL functions. |
| */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_int64( |
| state MADLIB_SCHEMA.bytea8, |
| value BIGINT, |
| weight DOUBLE PRECISION |
| ) RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| VOLATILE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_int64( |
| state_left MADLIB_SCHEMA.bytea8, |
| state_right MADLIB_SCHEMA.bytea8 |
| ) RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| VOLATILE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_final_int64( |
| state MADLIB_SCHEMA.bytea8 |
| ) RETURNS BIGINT |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| /** |
| * @brief Sample a single row according to weights |
| * |
| * @param value Value of row. Uniqueness is not enforced. If a value occurs |
| * multiple times, the probability of sampling this value is proportional to |
| * the sum of its weights. |
| * @param weight Weight for row. A negative value here is treated has zero |
| * weight. |
| * @return \c identifier of the selected row. The probability of sampling any |
| * particular row <tt>(value, weight)</tt> is |
| * <tt>weight/SUM(weight)</tt>. |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.weighted_sample( |
| BIGINT, DOUBLE PRECISION); |
| CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample( |
| /*+ value */ BIGINT, |
| /*+ weight */ DOUBLE PRECISION) ( |
| |
| SFUNC=MADLIB_SCHEMA.weighted_sample_transition_int64, |
| STYPE=MADLIB_SCHEMA.bytea8, |
| FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_int64, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.weighted_sample_merge_int64,') |
| INITCOND='' |
| ); |
| --------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_vector( |
| state MADLIB_SCHEMA.bytea8, |
| value DOUBLE PRECISION[], |
| weight DOUBLE PRECISION |
| ) RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| VOLATILE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_vector( |
| state_left MADLIB_SCHEMA.bytea8, |
| state_right MADLIB_SCHEMA.bytea8 |
| ) RETURNS MADLIB_SCHEMA.bytea8 |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| VOLATILE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_final_vector( |
| state MADLIB_SCHEMA.bytea8 |
| ) RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| IMMUTABLE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.weighted_sample( |
| DOUBLE PRECISION[], DOUBLE PRECISION); |
| CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample( |
| /*+ value */ DOUBLE PRECISION[], |
| /*+ weight */ DOUBLE PRECISION) ( |
| |
| SFUNC=MADLIB_SCHEMA.weighted_sample_transition_vector, |
| STYPE=MADLIB_SCHEMA.bytea8, |
| FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_vector, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.weighted_sample_merge_vector,') |
| INITCOND='' |
| ); |
| --------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.poisson_random( |
| mean double precision |
| ) RETURNS integer |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| VOLATILE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| --------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gamma_random( |
| alpha double precision |
| ) RETURNS double precision |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C |
| VOLATILE |
| STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| --------------------------------------------------------------------------- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.index_weighted_sample( |
| double precision[] |
| ) RETURNS integer |
| AS 'MODULE_PATHNAME' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |