blob: 8f8a56f6f11e8630ece92b044a47a1a594d0f29e [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file sample.sql_in
*
* @brief SQL functions for random sampling
*
* @sa For an overview of random-sampling functions, see the module
* description \ref grp_sample.
*
*//* ----------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_sample
<div class="toc"><b>Contents</b>
<ul>
<li><a href="#func_list">Functions</a></li>
<li><a href="#related">Related Topics</a></li>
</ul>
</div>
@brief Provides utility functions for sampling operations.
\warning <em> This MADlib method is still in early stage development.
Interface and implementation are subject to change. </em>
The random sampling module consists of useful utility functions for sampling
operations. These functions can be used while implementing
new algorithms.
@anchor syntax
@par Functions
Sample a single row according to weights.
<pre class="syntax">
weighted_sample( value,
weight
)
</pre>
\b Arguments
<dl class="arglist">
<dt>value</dt>
<dd>BIGINT or FLOAT8[]. Value of row. Uniqueness is not enforced. If a value occurs multiple times, the probability of sampling this value is proportional to the sum of its weights. </dd>
<dt>weight</dt>
<dd>FLOAT8. Weight for row. A negative value here is treated has zero weight. </dt>
</dl>
Refer to the file for documentation on each of the utility functions.
@anchor related
@par Related Topics
@sa File sample.sql_in documenting the SQL functions.
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_int64(
state MADLIB_SCHEMA.bytea8,
value BIGINT,
weight DOUBLE PRECISION
) RETURNS MADLIB_SCHEMA.bytea8
AS 'MODULE_PATHNAME'
LANGUAGE C
VOLATILE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_int64(
state_left MADLIB_SCHEMA.bytea8,
state_right MADLIB_SCHEMA.bytea8
) RETURNS MADLIB_SCHEMA.bytea8
AS 'MODULE_PATHNAME'
LANGUAGE C
VOLATILE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_final_int64(
state MADLIB_SCHEMA.bytea8
) RETURNS BIGINT
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
/**
* @brief Sample a single row according to weights
*
* @param value Value of row. Uniqueness is not enforced. If a value occurs
* multiple times, the probability of sampling this value is proportional to
* the sum of its weights.
* @param weight Weight for row. A negative value here is treated has zero
* weight.
* @return \c identifier of the selected row. The probability of sampling any
* particular row <tt>(value, weight)</tt> is
* <tt>weight/SUM(weight)</tt>.
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.weighted_sample(
BIGINT, DOUBLE PRECISION);
CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample(
/*+ value */ BIGINT,
/*+ weight */ DOUBLE PRECISION) (
SFUNC=MADLIB_SCHEMA.weighted_sample_transition_int64,
STYPE=MADLIB_SCHEMA.bytea8,
FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_int64,
m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.weighted_sample_merge_int64,')
INITCOND=''
);
---------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_transition_vector(
state MADLIB_SCHEMA.bytea8,
value DOUBLE PRECISION[],
weight DOUBLE PRECISION
) RETURNS MADLIB_SCHEMA.bytea8
AS 'MODULE_PATHNAME'
LANGUAGE C
VOLATILE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_merge_vector(
state_left MADLIB_SCHEMA.bytea8,
state_right MADLIB_SCHEMA.bytea8
) RETURNS MADLIB_SCHEMA.bytea8
AS 'MODULE_PATHNAME'
LANGUAGE C
VOLATILE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.weighted_sample_final_vector(
state MADLIB_SCHEMA.bytea8
) RETURNS DOUBLE PRECISION[]
AS 'MODULE_PATHNAME'
LANGUAGE C
IMMUTABLE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.weighted_sample(
DOUBLE PRECISION[], DOUBLE PRECISION);
CREATE AGGREGATE MADLIB_SCHEMA.weighted_sample(
/*+ value */ DOUBLE PRECISION[],
/*+ weight */ DOUBLE PRECISION) (
SFUNC=MADLIB_SCHEMA.weighted_sample_transition_vector,
STYPE=MADLIB_SCHEMA.bytea8,
FINALFUNC=MADLIB_SCHEMA.weighted_sample_final_vector,
m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.weighted_sample_merge_vector,')
INITCOND=''
);
---------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.poisson_random(
mean double precision
) RETURNS integer
AS 'MODULE_PATHNAME'
LANGUAGE C
VOLATILE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
---------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.gamma_random(
alpha double precision
) RETURNS double precision
AS 'MODULE_PATHNAME'
LANGUAGE C
VOLATILE
STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
---------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.index_weighted_sample(
double precision[]
) RETURNS integer
AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');