blob: 697498586d05e8cd0a80470a4258e7814594fe05 [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file lsvm.sql_in
*
* @brief SQL functions for linear support vector machines
* @sa For an introduction to Support vector machines (SVMs) and related kernel
* methods, see the module description \ref grp_kernmach.
*
*//* ------------------------------------------------------------------------*/
m4_include(`SQLCommon.m4')
DROP TYPE IF EXISTS MADLIB_SCHEMA.linear_svm_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.linear_svm_result AS (
coefficients double precision[],
loss double precision
);
--------------------------------------------------------------------------
-- create SQL functions for IGD optimizer
--------------------------------------------------------------------------
-- cannot be labeled as STRICT because we set previous_state NULL initially
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_svm_igd_transition(
state double precision[],
ind_var double precision[],
dep_var boolean,
previous_state double precision[],
dimension integer,
stepsize double precision,
reg double precision)
RETURNS double precision[] AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_svm_igd_merge(
state1 double precision[],
state2 double precision[])
RETURNS double precision[] AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.linear_svm_igd_final(
state double precision[])
RETURNS double precision[] AS 'MODULE_PATHNAME'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
/**
* @internal
* @brief Perform one iteration of the incremental gradient
* method for computing linear support vector machine
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.linear_svm_igd_step_serial(
/*+ ind_var */ double precision[],
/*+ dep_var */ boolean,
/*+ previous_state */ double precision[],
/*+ dimension */ integer,
/*+ stepsize */ double precision,
/*+ reg */ double precision);
CREATE AGGREGATE MADLIB_SCHEMA.linear_svm_igd_step_serial(
/*+ ind_var */ double precision[],
/*+ dep_var */ boolean,
/*+ previous_state */ double precision[],
/*+ dimension */ integer,
/*+ stepsize */ double precision,
/*+ reg */ double precision) (
STYPE=double precision[],
SFUNC=MADLIB_SCHEMA.linear_svm_igd_transition,
FINALFUNC=MADLIB_SCHEMA.linear_svm_igd_final,
INITCOND='{0,0,0,0,0}'
);
/**
* @internal
* @brief Perform one iteration of the incremental gradient
* method for computing linear support vector machine
*/
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.linear_svm_igd_step(
/*+ ind_var */ double precision[],
/*+ dep_var */ boolean,
/*+ previous_state */ double precision[],
/*+ dimension */ integer,
/*+ stepsize */ double precision,
/*+ reg */ double precision);
CREATE AGGREGATE MADLIB_SCHEMA.linear_svm_igd_step(
/*+ ind_var */ double precision[],
/*+ dep_var */ boolean,
/*+ previous_state */ double precision[],
/*+ dimension */ integer,
/*+ stepsize */ double precision,
/*+ reg */ double precision) (
STYPE=double precision[],
SFUNC=MADLIB_SCHEMA.linear_svm_igd_transition,
m4_ifdef(`__POSTGRESQL__', `', `prefunc=MADLIB_SCHEMA.linear_svm_igd_merge,')
FINALFUNC=MADLIB_SCHEMA.linear_svm_igd_final,
INITCOND='{0,0,0,0,0}'
);
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_linear_svm_igd_distance(
/*+ state1 */ double precision[],
/*+ state2 */ double precision[])
RETURNS double precision AS 'MODULE_PATHNAME'
LANGUAGE c IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.internal_linear_svm_igd_result(
/*+ state */ double precision[])
RETURNS MADLIB_SCHEMA.linear_svm_result AS 'MODULE_PATHNAME'
LANGUAGE c IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');
-- The following is the structure to record the results of the linear SVM sgd algorithm
--
DROP TYPE IF EXISTS MADLIB_SCHEMA.lsvm_sgd_model_rec CASCADE;
CREATE TYPE MADLIB_SCHEMA.lsvm_sgd_model_rec AS (
weights float8[], -- the weight vector
wdiv float8, -- scaling factor for the weights
wbias float8, -- offset/bias of the linear model
ind_dim int, -- the dimension of the individuals
inds int, -- number of individuals processed
cum_err int -- cumulative error
);
-- The following is the return type of a linear classifier learning process
--
DROP TYPE IF EXISTS MADLIB_SCHEMA.lsvm_sgd_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.lsvm_sgd_result AS (
model_table text, -- table where the model is stored
model_name text, -- model name
inds int, -- number of individuals processed
ind_dim int, -- the dimension of the individuals
cum_err float8, -- cumulative error
wdiv float8, -- scaling factor for the weights
wbias float8 -- classifier offset
);
/**
* @brief This is the linear support vector classification function
*
* @param input_table The name of the table/view with the training data
* @param model_table The name of the table under which we want to store the learned model
* @param parallel A flag indicating whether the system should learn multiple models in parallel (Defult: False)
* @param verbose Verbosity of reporting (Default: False)
* @param eta Initial learning rate in (0,1] (Default: 0.1)
* @param reg Regularization parameter, often chosen by cross-validation (Default: 0.001)
* @param max_iter Maximum number of iterations to run learning algorithm (Default: 100)
*
* @return A summary of the learning process
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.lsvm_classification(
input_table text,
model_table text,
parallel bool,
verbose bool,
eta float8,
reg float8,
max_iter int)
RETURNS SETOF MADLIB_SCHEMA.lsvm_sgd_result AS $$
PythonFunction(kernel_machines, lsvm, lsvm_classification)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
/**
* @brief This is the linear support vector classification function
*
* @param input_table The name of the table/view with the training data
* @param model_table The name of the table under which we want to store the learned model
* @param parallel A flag indicating whether the system should learn multiple models in parallel (Defult: False)
* @param verbose Verbosity of reporting (Default: False)
* @param eta Initial learning rate in (0,1] (Default: 0.1)
* @param reg Regularization parameter, often chosen by cross-validation (Default: 0.001)
*
* @return A summary of the learning process
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.lsvm_classification(
input_table text,
model_table text,
parallel bool,
verbose bool,
eta float8,
reg float8)
RETURNS SETOF MADLIB_SCHEMA.lsvm_sgd_result AS $$
SELECT MADLIB_SCHEMA.lsvm_classification($1, $2, $3, $4, $5, $6, 100);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
/**
* @brief This is the linear support vector classification function
*
* @param input_table The name of the table/view with the training data
* @param model_table The name of the table under which we want to store the learned model
* @param parallel A flag indicating whether the system should learn multiple models in parallel (Default: false)
*
* @return A summary of the learning process
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.lsvm_classification(
input_table text, model_table text, parallel bool)
RETURNS SETOF MADLIB_SCHEMA.lsvm_sgd_result AS $$
SELECT MADLIB_SCHEMA.lsvm_classification($1, $2, $3, false, .1, .001);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
/**
* @brief This is the linear support vector classification function
*
* @param input_table The name of the table/view with the training data
* @param model_table The name of the table under which we want to store the learned model
*
* @return A summary of the learning process
*
* @internal
* @sa This function is a wrapper for lsvm.lsvm_classification().
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.lsvm_classification(
input_table text, model_table text)
RETURNS SETOF MADLIB_SCHEMA.lsvm_sgd_result AS $$
SELECT MADLIB_SCHEMA.lsvm_classification($1, $2, false);
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
/**
* @brief Scores the data points stored in a table using a learned linear support-vector model
*
* @param input_table Name of table/view containing the data points to be scored
* @param data_col Name of column in input_table containing the data points
* @param id_col Name of column in input_table containing the integer identifier of data points
* @param model_table Name of table where the learned model to be used is stored
* @param output_table Name of table to store the results
* @param parallel Deprecated and ignored boolean flag. (Default: NULL)
*
* @return Textual summary of the algorithm run
*
* @internal
* @sa This function is a wrapper for lsvm.lsvm_predict_batch().
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.lsvm_predict_batch(
input_table text,
data_col text,
id_col text,
model_table text,
output_table text,
parallel bool)
RETURNS text AS $$
PythonFunction(kernel_machines, lsvm, lsvm_predict_batch)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
/**
* @brief Scores the data points stored in a table using a learned linear support-vector model
*
* @param input_table Name of table/view containing the data points to be scored
* @param data_col Name of column in input_table containing the data points
* @param id_col Name of column in input_table containing the integer identifier of data points
* @param model_table Name of table where the learned model to be used is stored
* @param output_table Name of table to store the results
*
* @return Textual summary of the algorithm run
*
* @internal
* @sa This function is a wrapper for lsvm.lsvm_predict_batch().
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.lsvm_predict_batch(
input_table text,
data_col text,
id_col text,
model_table text,
output_table text)
RETURNS text AS $$
SELECT MADLIB_SCHEMA.lsvm_predict_batch($1, $2, $3, $4, $5, NULL);
$$ LANGUAGE SQL VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA');
/**
* @brief Evaluates a linear support-vector model on a given data point
*
* @param coefficients The learned model coefficients
* @param ind_var The independent variables of the instance to be predicted
*
* @return This function returns 1 or -1
*/
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.lsvm_predict(
coefficients double precision[],
ind_var double precision[])
RETURNS double precision AS
'MODULE_PATHNAME', 'linear_svm_igd_predict'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL');