| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file online_sv.sql_in |
| * |
| * @brief SQL functions for support vector machines |
| * @sa For an introduction to Support vector machines (SVMs) and related kernel |
| * methods, see the module description \ref grp_kernmach. |
| * |
| *//* ------------------------------------------------------------------------*/ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /** |
| @addtogroup grp_kernmach |
| |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li><a href="#learn">Regression Learning Function</a></li> |
| <li><a href="#classify">Classification Learning Function</a></li> |
| <li><a href="#novelty">Novelty Detection Functions</a></li> |
| <li><a href="#predict">Prediction Functions</a></li> |
| <li><a href="#notes">Notes</a></li> |
| <li><a href="#examples">Examples</a></li> |
| <li><a href="#literature">Literature</a></li> |
| <li><a href="#related">Related Topics</a></li> |
| </ul> |
| </div> |
| |
| @brief Generates support vector machines for classification, regression, and novelty detection. |
| |
| \warning <em> This MADlib method is still in early stage development. There may be some |
| issues that will be addressed in a future version. Interface and implementation |
| is subject to change. </em> |
| |
| Support vector machines (SVMs) and related kernel methods have been among |
| the most popular and well-studied machine learning techniques of the |
| past 15 years, with an amazing number of innovations and applications. |
| |
| In a nutshell, an SVM model \f$f(x)\f$ takes the form of |
| \f[ |
| f(x) = \sum_i \alpha_i k(x_i,x), |
| \f] |
| where each \f$ \alpha_i \f$ is a real number, each \f$ \boldsymbol x_i \f$ is a |
| data point from the training set (called a support vector), and |
| \f$ k(\cdot, \cdot) \f$ is a kernel function that measures how "similar" two |
| objects are. In regression, \f$ f(\boldsymbol x) \f$ is the regression function |
| we seek. In classification, \f$ f(\boldsymbol x) \f$ serves as |
| the decision boundary; so for example in binary classification, the predictor |
| can output class 1 for object \f$x\f$ if \f$ f(\boldsymbol x) \geq 0 \f$, and class |
| 2 otherwise. |
| |
| In the case when the kernel function \f$ k(\cdot, \cdot) \f$ is the standard |
| inner product on vectors, \f$ f(\boldsymbol x) \f$ is just an alternative way of |
| writing a linear function |
| \f[ |
| f'(\boldsymbol x) = \langle \boldsymbol w, \boldsymbol x \rangle, |
| \f] |
| where \f$ \boldsymbol w \f$ is a weight vector having the same dimension as |
| \f$ \boldsymbol x \f$. One of the key points of SVMs is that we can use more |
| fancy kernel functions to efficiently learn linear models in high-dimensional |
| feature spaces, since \f$ k(\boldsymbol x_i, \boldsymbol x_j) \f$ can be |
| understood as an efficient way of computing an inner product in the feature |
| space: |
| \f[ |
| k(\boldsymbol x_i, \boldsymbol x_j) |
| = \langle \phi(\boldsymbol x_i), \phi(\boldsymbol x_j) \rangle, |
| \f] |
| where \f$ \phi(\boldsymbol x) \f$ projects \f$ \boldsymbol x \f$ into a |
| (possibly infinite-dimensional) feature space. |
| |
| There are many algorithms for learning kernel machines. This module |
| implements the class of online learning with kernels algorithms |
| described in Kivinen et al. [1]. It also includes the incremental gradient |
| descent (IGD) method Feng et al. [3] for learning linear SVMs with the Hinge |
| loss \f$l(z) = \max(0, 1-z)\f$. See also the book Scholkopf and Smola [2] for |
| more details of SVMs in general. |
| |
| The IGD implementation is based on Bismarck project in University of Wisconsin |
| (http://hazy.cs.wisc.edu/hazy/victor/bismarck/). The methods introduced in [1] |
| are implemented according to their original descriptions, except that |
| we only update the support vector model when we make a significant |
| error. The original algorithms in [1] update the support vector model at |
| every step, even when no error was made, in the name of |
| regularization. For practical purposes, and this is verified |
| empirically to a certain degree, updating only when necessary is both |
| faster and better from a learning-theoretic point of view, at least in |
| the i.i.d. setting. |
| |
| Methods for classification, regression and novelty detection are |
| available. Multiple instances of the algorithms can be executed |
| in parallel on different subsets of the training data. The resultant |
| support vector models can then be combined using standard techniques |
| like averaging or majority voting. |
| |
| Training data points are accessed via a table or a view. The support |
| vector models can also be stored in tables for fast execution. |
| |
| |
| @anchor learn |
| @par Regression Learning Function |
| |
| Regression learning is achieved through the following function: |
| <pre class="syntax"> |
| svm_regression( input_table, |
| model_table, |
| parallel, |
| kernel_func, |
| verbose DEFAULT false, |
| eta DEFAULT 0.1, |
| nu DEFAULT 0.005, |
| slambda DEFAULT 0.05, |
| kernel_param DEFAULT 1.0 |
| )</pre> |
| |
| For classification and regression, the training table/view is expected to be of the following form (the array size of <em>ind</em> must not be greater than 102,400.):\n |
| <pre>{TABLE|VIEW} input_table ( |
| ... |
| id INT, |
| ind FLOAT8[], |
| label FLOAT8, |
| ... |
| )</pre> |
| |
| For novelty detection, the label field is not required. |
| Also note that the column names of input_table requires to be exactly the same as described above. This limitation will be removed when this module graduates from early development stage. |
| |
| @anchor classify |
| @par Classification Learning Function |
| |
| Classification learning is achieved through the following two functions: |
| |
| - Learn linear SVM(s) using IGD [3]. |
| <pre class="syntax"> |
| lsvm_classification( input_table, |
| model_table, |
| parallel DEFAULT false, |
| verbose DEFAULT false, |
| eta DEFAULT 0.1, |
| reg DEFAULT 0.001, |
| max_iter DEFAULT 100 |
| ) |
| </pre> |
| Note that, as any gradient descent methods, IGD will converge with a greater |
| eta (stepsize), thus faster, if the input training data is well-conditioned. |
| We highly recommend user to perform data preparation, such that the mean value |
| of each feature column is 0 and standard error is 1, and append an extra |
| feature with constant value 1 for intercept term. We plan to provide a |
| function for this when this module graduates from early stage development. |
| |
| - Learn linear or non-linear SVM(s) using the method described in [1]. |
| <pre class="syntax"> |
| svm_classification( input_table, |
| model_table, |
| parallel, |
| kernel_func, |
| verbose DEFAULT false, |
| eta DEFAULT 0.1, |
| nu DEFAULT 0.005, |
| kernel_param DEFAULT 1.0 |
| ) |
| </pre> |
| |
| @anchor novelty |
| @par Novelty Detection Function |
| |
| Novelty detection is achieved through the following function: |
| <pre class="syntax"> |
| svm_novelty_detection( input_table, |
| model_table, |
| parallel, |
| kernel_func, |
| verbose DEFAULT false, |
| eta DEFAULT 0.1, |
| nu DEFAULT 0.005, |
| kernel_param DEFAULT 1.0 |
| ) |
| </pre> |
| |
| Assuming the model_table parameter takes on value 'model', each learning function will produce two tables |
| as output: 'model' and 'model_param'. |
| The first contains the support vectors of the model(s) learned. |
| The second contains the parameters of the model(s) learned, which include information like the kernel function |
| used and the value of the intercept, if there is one. |
| |
| @anchor predict |
| @par Prediction Functions |
| |
| - To make predictions on a single data point x using a single model learned previously, we use the function |
| <pre class="syntax"> |
| svm_predict_batch( input_table, |
| data_col, |
| id_col, |
| model_table, |
| output_table, |
| parallel |
| ) |
| </pre> |
| If the \c parallel parameter is true, then each data point in the input table will have multiple |
| predicted values corresponding to the number of models learned in |
| |
| - If the model is produced by the lsvm_classification() function, use |
| the following prediction function instead. |
| <pre class="syntax"> |
| lsvm_predict_batch( input_table, |
| data_col, |
| id_col, |
| model_table, |
| output_table, |
| ) |
| </pre> |
| |
| - Note that, to make predictions on a subset of data points stored in a table, |
| a separated view or table needs to be created ahead of time: |
| <pre class="example"> |
| -- create subset as a view |
| CREATE VIEW subset AS SELECT * FROM input_table WHERE id <= 100; |
| -- prediction on the subset |
| SELECT svm_predict_batch('subset', 'ind', 'id', |
| 'svm_model', 'subset_svm_predict', false); |
| -- prediction using linear SVMs |
| SELECT lsvm_predict_batch('subset', 'ind', 'id', |
| 'lsvm_model', 'subset_lsvm_predict'); |
| </pre> |
| |
| @anchor notes |
| @par Notes |
| |
| The <tt>kernel_func</tt> argument of <tt>svm_classification</tt>, |
| <tt>svm_regression</tt>, and <tt>svm_novelty_detection</tt> can only accept a |
| kernel function in the following form: |
| |
| <pre class="syntax"> |
| FLOAT8 kernel_function(FLOAT8[], FLOAT8[], FLOAT8) |
| </pre> |
| |
| The first two parameters are feature vectors, and the third one is a control |
| parameter for the kernel function. The value of the control parameter must be |
| set throught the <tt>kernel_param</tt> argument of <tt>svm_classification</tt>, |
| <tt>svm_regression</tt>, and <tt>svm_novelty_detection</tt>. |
| |
| Currently, three kernel functions have been implemented: linear or dot product |
| (\ref svm_dot), polynomial (\ref svm_polynomial) and Gaussian (\ref |
| svm_gaussian) kernels. Note that for the dot product kernel, it actually only |
| requires two FLOAT8[] parameters. To be compliant with the requirements for the |
| kernel function, we have an overloaded version of <tt>svm_dot</tt> which |
| accepts two FLOAT8[] and one FLOAT8 and returns a FLOAT8, but the FLOAT8 parameter |
| is simply a placeholder and will be ignored. |
| |
| With the HAWQ database, only the above pre-built kernel functions can be |
| used. With the Greenplum database and PostgreSQL database, one can use any |
| user-defined function as long as it conforms to the requirements for the kernel |
| function. |
| |
| @anchor examples |
| @examp |
| |
| As a general first step, prepare and populate an input table/view with the following structure: |
| <pre class="example"> |
| CREATE TABLE/VIEW my_schema.my_input_table( |
| id INT, -- point ID |
| ind FLOAT8[], -- data point |
| label FLOAT8 -- label of data point |
| ); |
| </pre> |
| The label field is not required for novelty detection. |
| |
| <strong>Example usage for regression</strong>: |
| -# Randomly generate 1000 5-dimensional data labelled using the simple target function. |
| <pre class="example"> |
| t(x) = if x[5] = 10 then 50 else if x[5] = -10 then 50 else 0; |
| </pre> |
| and store that in the my_schema.my_train_data table as follows: |
| <pre class="example"> |
| SELECT madlib.svm_generate_reg_data( |
| 'my_schema.my_train_data', 1000, 5); |
| </pre> |
| |
| -# Learn a regression model and store the resultant model under the name 'myexp'. |
| <pre class="example"> |
| SELECT madlib.svm_regression('my_schema.my_train_data', |
| 'myexp1', false, 'madlib.svm_dot'); |
| </pre> |
| |
| -# To learn multiple support vector models, we replace the learning step above by |
| <pre class="example"> |
| SELECT madlib.svm_regression('my_schema.my_train_data', |
| 'myexp2', true, 'madlib.svm_dot'); |
| </pre> |
| |
| -# We can also predict the labels of data points stored in a table. For example, we can execute the following: |
| <pre class="example"> |
| -- prepare test data |
| CREATE TABLE madlib.svm_reg_test AS |
| SELECT id, ind |
| FROM my_schema.my_train_data |
| LIMIT 20; |
| -- prediction using a single model |
| SELECT madlib.svm_predict_batch('madlib.svm_reg_test', 'ind', 'id', |
| 'myexp1', 'madlib.svm_reg_output1', false); |
| SELECT * FROM madlib.svm_reg_output1; |
| -- prediction using multiple models |
| SELECT madlib.svm_predict_batch('madlib.svm_reg_test', 'ind', 'id', |
| 'myexp2', 'madlib.svm_reg_output2', true); |
| SELECT * FROM madlib.svm_reg_output2; |
| </pre> |
| |
| <strong>Example usage for classification:</strong> |
| -# Randomly generate training and testing data labelled by the simple target function. |
| <pre class="example"> |
| t(x) = if x[1] > 0 and x[2] < 0 then 1 else -1; |
| </pre> |
| and store that in tables as follows: |
| <pre class="example"> |
| SELECT madlib.svm_generate_cls_data( |
| 'my_schema.my_train_data', 2000, 5); |
| SELECT madlib.svm_generate_cls_data( |
| 'my_schema.my_test_data', 3000, 5); |
| </pre> |
| |
| -# Learn a classification model and store the resultant model the table 'myexpc'. |
| <pre class="example"> |
| SELECT madlib.svm_classification('my_schema.my_train_data', |
| 'myexpc', false, 'madlib.svm_dot'); |
| </pre> |
| |
| -# Start using the model to predict the labels of testing data points. |
| <pre class="example"> |
| SELECT madlib.svm_predict_batch('my_schema.my_test_data', 'ind', 'id', |
| 'myexpc', 'my_schema.svm_cls_output1', false); |
| </pre> |
| |
| -# To learn multiple support vector models, replace the model-building and prediction steps above. |
| <pre class="example"> |
| -- training |
| SELECT madlib.svm_classification('my_schema.my_train_data', |
| 'myexpc', true, 'madlib.svm_dot'); |
| -- predicting |
| SELECT madlib.svm_predict_batch('my_schema.my_test_data', 'ind', 'id', |
| 'myexpc', 'my_schema.svm_cls_output1', true); |
| </pre> |
| |
| -# To learn a linear support vector model using IGD [3], replace the model-building and prediction steps. |
| <pre class="example"> |
| -- training |
| SELECT madlib.lsvm_classification( |
| 'my_schema.my_train_data', 'my_lsvm'); |
| -- predicting |
| SELECT madlib.lsvm_predict_batch('my_schema.my_test_data', |
| 'ind', 'id', 'my_lsvm', 'my_lsvm_predict'); |
| </pre> |
| |
| |
| <strong>Example usage for novelty detection:</strong> |
| |
| -# Randomly generate 100 2-dimensional data (the normal cases) and store that in the my_schema.my_train_data table. |
| <pre class="example"> |
| SELECT madlib.svm_generate_nd_data( |
| 'my_schema.my_train_data', 100, 2); |
| </pre> |
| |
| -# Learning and predicting using a single novelty detection model: |
| <pre class="example"> |
| SELECT madlib.svm_novelty_detection( 'my_schema.my_train_data', |
| 'myexpnd', |
| false, |
| 'madlib.svm_dot' |
| ); |
| SELECT * FROM myexpnd; |
| </pre> |
| |
| -# Learning and predicting using multiple models can be done as follows: |
| <pre class="example"> |
| SELECT madlib.svm_novelty_detection( 'my_schema.my_train_data', |
| 'myexpnd', |
| true, |
| 'madlib.svm_dot' |
| ); |
| SELECT * FROM myexpnd; |
| </pre> |
| |
| |
| <strong>Model cleanup:</strong> |
| To drop all tables pertaining to the model, use: |
| <pre class="example"> |
| SELECT svm_drop_model('model_table'); |
| </pre> |
| |
| @anchor literature |
| @literature |
| |
| [1] Jyrki Kivinen, Alexander J. Smola, and Robert C. Williamson: <em>Online |
| Learning with Kernels</em>, IEEE Transactions on Signal Processing, 52(8), |
| 2165-2176, 2004. |
| |
| [2] Bernhard Scholkopf and Alexander J. Smola: <em>Learning with Kernels: |
| Support Vector Machines, Regularization, Optimization, and Beyond</em>, |
| MIT Press, 2002. |
| |
| [3] X. Feng, A. Kumar, B. Recht, and C. Ré: <em>Towards a unified architecture |
| for in-RDBMS analytics</em>, In SIGMOD Conference, 2012. |
| |
| @anchor related |
| @par Related Topics |
| |
| File online_sv.sql_in and lsvm.sql_in documenting the SQL functions. |
| |
| @internal |
| @sa namespace online_sv (documenting the implementation in Python) |
| @endinternal |
| |
| */ |
| |
| |
| |
| -- The following is the structure to record the results of a learning process. |
| -- We work with arrays of float8 for now; we'll extend the code to work with sparse vectors next. |
| -- |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.svm_model_rec CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.svm_model_rec AS ( |
| inds int, -- number of individuals processed |
| cum_err float8, -- cumulative error |
| epsilon float8, -- the size of the epsilon tube around the hyperplane, adaptively adjusted by algorithm |
| rho float8, -- classification margin |
| b float8, -- classifier offset |
| nsvs int, -- number of support vectors |
| ind_dim int, -- the dimension of the individuals |
| weights float8[], -- the weight of the support vectors |
| individuals float8[], -- the array of support vectors, represented as a 1-D array |
| kernel_oid oid -- OID of kernel function |
| ); |
| |
| -- The following is the return type of a regression learning process |
| -- |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.svm_reg_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.svm_reg_result AS ( |
| model_table text, -- table where the model is stored |
| model_name text, -- model name |
| inds int, -- number of individuals processed |
| cum_err float8, -- cumulative error |
| epsilon float8, -- the size of the epsilon tube around the hyperplane, adaptively adjusted by algorithm |
| b float8, -- classifier offset |
| nsvs int -- number of support vectors |
| ); |
| |
| -- The following is the return type of a classification learning process |
| -- |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.svm_cls_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.svm_cls_result AS ( |
| model_table text, -- table where the model is stored |
| model_name text, -- model name |
| inds int, -- number of individuals processed |
| cum_err float8, -- cumulative error |
| rho float8, -- classification margin |
| b float8, -- classifier offset |
| nsvs int -- number of support vectors |
| ); |
| |
| -- The following is the return type of a novelty detection learning process |
| -- |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.svm_nd_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.svm_nd_result AS ( |
| model_table text, -- table where the model is stored |
| model_name text, -- model name |
| inds int, -- number of individuals processed |
| rho float8, -- classification margin |
| nsvs int -- number of support vectors |
| ); |
| |
| -- The type for representing support vectors |
| -- |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.svm_support_vector CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.svm_support_vector AS ( id text, weight float8, sv float8[] ); |
| |
| |
| |
| -- Kernel functions are a generalisation of inner products. |
| -- They provide the means by which we can extend linear machines to work in non-linear transformed feature spaces. |
| -- Here are a few standard kernels: dot product, polynomial kernel, Gaussian kernel. |
| -- |
| /** |
| * @brief Dot product kernel function |
| * |
| * @param x The data point \f$ \boldsymbol x \f$ |
| * @param y The data point \f$ \boldsymbol y \f$ |
| * @return Returns dot product of the two data points. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_dot(x float8[], y float8[]) RETURNS float8 |
| AS 'MODULE_PATHNAME', 'array_dot' LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_dot(x float8[], y float8[], placeholder float8) RETURNS float8 |
| AS 'MODULE_PATHNAME', 'array_dot' LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| /** |
| * @brief Polynomial kernel function |
| * |
| * @param x The data point \f$ \boldsymbol x \f$ |
| * @param y The data point \f$ \boldsymbol y \f$ |
| * @param degree The degree \f$ d \f$ |
| * @return Returns \f$ K(\boldsymbol x,\boldsymbol y)=(\boldsymbol x \cdot \boldsymbol y)^d \f$ |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_polynomial(x float8[], y float8[], degree float8) RETURNS float8 |
| AS 'MODULE_PATHNAME', 'svm_polynomial' LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| /** |
| * @brief Gaussian kernel function |
| * |
| * @param x The data point \f$ \boldsymbol x \f$ |
| * @param y The data point \f$ \boldsymbol y \f$ |
| * @param gamma The spread \f$ \gamma \f$ |
| * @return Returns \f$ K(\boldsymbol x,\boldsymbol y)=exp(-\gamma || \boldsymbol x \cdot \boldsymbol y ||^2 ) \f$ |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_gaussian(x float8[], y float8[], gamma float8) RETURNS float8 |
| AS 'MODULE_PATHNAME', 'svm_gaussian' LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_predict_sub( |
| int, int, float8[], float8[], float8[], text, float8 |
| ) RETURNS float8 AS |
| 'MODULE_PATHNAME', 'svm_predict_sub' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_predict( |
| svs MADLIB_SCHEMA.svm_model_rec, |
| ind float8[], |
| kernel text, |
| kernel_param float8 |
| ) RETURNS float8 AS $$ |
| SELECT MADLIB_SCHEMA.svm_predict_sub($1.nsvs, $1.ind_dim, $1.weights, $1.individuals, $2, $3, $4); |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| -- This is the main online support vector regression learning algorithm. |
| -- The function updates the support vector model as it processes each new training example. |
| -- This function is wrapped in an aggregate function to process all the training examples stored in a table. |
| -- |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_reg_update( |
| svs MADLIB_SCHEMA.svm_model_rec, |
| ind FLOAT8[], |
| label FLOAT8, |
| kernel TEXT, |
| eta FLOAT8, |
| nu FLOAT8, |
| slambda FLOAT8, |
| kernel_param FLOAT8 |
| ) RETURNS MADLIB_SCHEMA.svm_model_rec AS |
| 'MODULE_PATHNAME', 'svm_reg_update' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svm_reg_agg( |
| float8[], float8, text, float8, float8, float8, float8 |
| ); |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.svm_reg_agg( |
| float8[], float8, text, float8, float8, float8, float8 |
| ) ( |
| sfunc = MADLIB_SCHEMA.svm_reg_update, |
| stype = MADLIB_SCHEMA.svm_model_rec, |
| initcond = '(0,0,0,0,0,0,0,{},{},0)' |
| ); |
| |
| -- This is the main online support vector classification learning algorithm. |
| -- The function updates the support vector model as it processes each new training example. |
| -- This function is wrapped in an aggregate function to process all the training examples stored in a table. |
| -- |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_cls_update( |
| svs MADLIB_SCHEMA.svm_model_rec, |
| ind FLOAT8[], |
| label FLOAT8, |
| kernel TEXT, |
| eta FLOAT8, |
| nu FLOAT8, |
| kernel_param FLOAT8 |
| ) RETURNS MADLIB_SCHEMA.svm_model_rec AS |
| 'MODULE_PATHNAME', 'svm_cls_update' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svm_cls_agg( |
| float8[], float8, text, float8, float8, float8); |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.svm_cls_agg( |
| float8[], float8, text, float8, float8, float8 |
| ) ( |
| sfunc = MADLIB_SCHEMA.svm_cls_update, |
| stype = MADLIB_SCHEMA.svm_model_rec, |
| initcond = '(0,0,0,0,0,0,0,{},{},0)' |
| ); |
| |
| -- This is the main online support vector novelty detection algorithm. |
| -- The function updates the support vector model as it processes each new training example. |
| -- In contrast to classification and regression, the training data points have no labels. |
| -- This function is wrapped in an aggregate function to process all the training examples stored in a table. |
| -- |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_nd_update( |
| svs MADLIB_SCHEMA.svm_model_rec, |
| ind FLOAT8[], |
| kernel TEXT, |
| eta FLOAT8, |
| nu FLOAT8, |
| kernel_param FLOAT8) |
| RETURNS MADLIB_SCHEMA.svm_model_rec AS |
| 'MODULE_PATHNAME', 'svm_nd_update' |
| LANGUAGE C STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.svm_nd_agg( |
| float8[], text, float8, float8, float8); |
| |
| CREATE AGGREGATE MADLIB_SCHEMA.svm_nd_agg( |
| float8[], text, float8, float8, float8 |
| ) ( |
| sfunc = MADLIB_SCHEMA.svm_nd_update, |
| stype = MADLIB_SCHEMA.svm_model_rec, |
| initcond = '(0,0,0,0,0,0,0,{},{},0)' |
| ); |
| |
| |
| -- This function stores a MADLIB_SCHEMA.svm_model_rec stored in model_temp_table into the model_table. |
| -- |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_store_model( |
| model_temp_table TEXT, |
| model_name TEXT, |
| model_table TEXT |
| ) RETURNS VOID AS $$ |
| |
| sql = "SELECT COUNT(*) FROM " + model_temp_table + " WHERE id = \'" + model_name + "\'"; |
| temp = plpy.execute(sql); |
| if (temp[0]['count'] == 0): |
| plpy.error("No support vector model with name " + model_name + " found."); |
| |
| sql = "SELECT (model).ind_dim, (model).nsvs" \ |
| + " FROM " + model_temp_table + " WHERE id = '" + model_name + "'"; |
| rv = plpy.execute(sql); |
| myind_dim = rv[0]['ind_dim']; |
| mynsvs = rv[0]['nsvs']; |
| |
| if (mynsvs == 0): |
| plpy.error("The specified model has no support vectors and therefore not processed"); |
| |
| idx = 0; |
| for i in range(1,mynsvs+1): |
| idx = myind_dim * (i-1); |
| sql = "INSERT INTO " + model_table \ |
| + " SELECT \'" + model_name + "\', (model).weights[" + str(i) + "], " \ |
| + " (model).individuals[(" + str(idx+1) + "):(" + str(idx) + "+" + str(myind_dim) + ")] " \ |
| + " FROM " + model_temp_table + " WHERE id = \'" + model_name + "\' LIMIT 1"; |
| plpy.execute(sql); |
| |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| /** |
| * @brief Drops all tables pertaining to a model |
| * |
| * @param model_table The table to be dropped. |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_drop_model(model_table TEXT) RETURNS VOID AS $$ |
| plpy.execute("drop table if exists " + model_table) |
| plpy.execute("drop table if exists " + model_table + "_param") |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.svm_model_pr CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.svm_model_pr AS ( model text, prediction float8 ); |
| |
| |
| /** |
| * @brief This is the support vector regression function |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_regression(). |
| */ |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_regression(input_table text, model_table text, parallel bool, kernel_func text) |
| RETURNS SETOF MADLIB_SCHEMA.svm_reg_result |
| AS $$ |
| |
| PythonFunctionBodyOnly(`kernel_machines', `online_sv') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return online_sv.svm_regression( schema_madlib, input_table, model_table, parallel, kernel_func); |
| |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| /** |
| * @brief This is the support vector regression function |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @param verbose Verbosity of reporting |
| * @param eta Learning rate in (0,1] |
| * @param nu Compression parameter in (0,1] associated with the fraction of training data that will become support vectors |
| * @param slambda Regularisation parameter |
| * @param kernel_param Default parameter for kernel function |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_regression(). |
| */ |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_regression( |
| input_table text, |
| model_table text, |
| parallel bool, |
| kernel_func text, |
| verbose bool, |
| eta float8, |
| nu float8, |
| slambda float8, |
| kernel_param float8) |
| RETURNS SETOF MADLIB_SCHEMA.svm_reg_result |
| AS $$ |
| |
| PythonFunctionBodyOnly(`kernel_machines', `online_sv') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return online_sv.svm_regression(schema_madlib, input_table, model_table, |
| parallel, kernel_func, verbose, eta, nu, |
| slambda, kernel_param); |
| |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| |
| /** |
| * @brief This is the support vector regression function |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @param verbose Verbosity of reporting |
| * @param eta Learning rate in (0,1] |
| * @param nu Compression parameter in (0,1] associated with the fraction of training data that will become support vectors |
| * @param slambda Regularisation parameter |
| * @param kernel_param Default parameter for kernel function |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_regression(). |
| */ |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_regression( |
| input_table text, |
| model_table text, |
| parallel bool, |
| kernel_func text, |
| verbose bool, |
| eta float8, |
| nu float8, |
| slambda float8 |
| )RETURNS SETOF MADLIB_SCHEMA.svm_reg_result |
| AS $$ |
| SELECT MADLIB_SCHEMA.svm_regression($1, $2, $3, $4, $5, $6, $7, $8, 1.0) |
| |
| $$ LANGUAGE 'sql' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| /** |
| * @brief This is the support vector classification function |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_classification(). |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_classification( |
| input_table TEXT, |
| model_table TEXT, |
| parallel BOOL, |
| kernel_func TEXT, |
| kernel_param DOUBLE PRECISION |
| ) RETURNS SETOF MADLIB_SCHEMA.svm_cls_result |
| AS $$ |
| |
| PythonFunctionBodyOnly(`kernel_machines', `online_sv') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return online_sv.svm_classification(schema_madlib, input_table, |
| model_table, parallel, kernel_func, |
| kernel_param=kernel_param); |
| |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_classification( |
| input_table TEXT, |
| model_table TEXT, |
| parallel BOOL, |
| kernel_func TEXT |
| ) RETURNS SETOF MADLIB_SCHEMA.svm_cls_result |
| AS $$ |
| SELECT MADLIB_SCHEMA.svm_classification($1, $2, $3, $4, 1.0); |
| $$ LANGUAGE sql |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| ---------------------------------------------------------------------- |
| |
| /** |
| * @brief This is the support vector classification function |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @param verbose Verbosity of reporting |
| * @param eta Learning rate in (0,1] |
| * @param nu Compression parameter in (0,1] associated with the fraction of training data that will become support vectors |
| * @param kernel_param Default parameter for kernel function |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_classification(). |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_classification( |
| input_table TEXT, |
| model_table TEXT, |
| parallel BOOL, |
| kernel_func TEXT, |
| verbose BOOL, |
| eta FLOAT8, |
| nu FLOAT8, |
| kernel_param DOUBLE PRECISION |
| ) RETURNS SETOF MADLIB_SCHEMA.svm_cls_result |
| AS $$ |
| |
| PythonFunctionBodyOnly(`kernel_machines', `online_sv') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return online_sv.svm_classification(schema_madlib, input_table, model_table, parallel, |
| kernel_func, verbose, eta, nu, kernel_param); |
| |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_classification( |
| input_table TEXT, |
| model_table TEXT, |
| parallel BOOL, |
| kernel_func TEXT, |
| verbose BOOL, |
| eta FLOAT8, |
| nu FLOAT8 |
| ) RETURNS SETOF MADLIB_SCHEMA.svm_cls_result |
| AS $$ |
| SELECT MADLIB_SCHEMA.svm_classification($1, $2, $3, $4, $5, $6, $7, 1.0); |
| $$ LANGUAGE sql |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| ---------------------------------------------------------------------- |
| |
| /** |
| * @brief This is the support vector novelty detection function. |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_novelty_detection(). |
| */ |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_novelty_detection(input_table text, model_table text, parallel bool, kernel_func text) |
| RETURNS SETOF MADLIB_SCHEMA.svm_nd_result |
| AS $$ |
| |
| PythonFunctionBodyOnly(`kernel_machines', `online_sv') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return online_sv.svm_novelty_detection( schema_madlib, input_table, model_table, parallel, kernel_func); |
| |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| /** |
| * @brief This is the support vector novelty detection function. |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @param verbose Verbosity of reporting |
| * @param eta Learning rate in (0,1] |
| * @param nu Compression parameter in (0,1] associated with the fraction of training data that will become support vectors |
| * @param kernel_param Default parameter for kernel function |
| * @param kernel_param |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_novelty_detection(). |
| */ |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_novelty_detection( |
| input_table text, |
| model_table text, |
| parallel bool, |
| kernel_func text, |
| verbose bool, |
| eta float8, |
| nu float8, |
| kernel_param float8 |
| ) RETURNS SETOF MADLIB_SCHEMA.svm_nd_result |
| AS $$ |
| |
| PythonFunctionBodyOnly(`kernel_machines', `online_sv') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return online_sv.svm_novelty_detection(schema_madlib, input_table, |
| model_table, parallel, |
| kernel_func, verbose, eta, nu, |
| kernel_param); |
| |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| /** |
| * @brief This is the support vector novelty detection function. |
| * |
| * @param input_table The name of the table/view with the training data |
| * @param model_table The name of the table under which we want to store the learned model |
| * @param parallel A flag indicating whether the system should learn multiple models in parallel |
| * @param kernel_func Kernel function |
| * @param verbose Verbosity of reporting |
| * @param eta Learning rate in (0,1] |
| * @param nu Compression parameter in (0,1] associated with the fraction of training data that will become support vectors |
| * @param kernel_param Default parameter for kernel function |
| * @param kernel_param |
| * @return A summary of the learning process |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_novelty_detection(). |
| */ |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_novelty_detection( |
| input_table text, |
| model_table text, |
| parallel bool, |
| kernel_func text, |
| verbose bool, |
| eta float8, |
| nu float8 |
| ) RETURNS SETOF MADLIB_SCHEMA.svm_nd_result |
| AS $$ |
| SELECT MADLIB_SCHEMA.svm_novelty_detection($1, $2, $3, $4, $5, $6, $7, 1.0) |
| $$ LANGUAGE 'sql' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| /** |
| * @brief Scores the data points stored in a table using a learned support-vector model |
| * |
| * @param input_table Name of table/view containing the data points to be scored |
| * @param data_col Name of column in input_table containing the data points |
| * @param id_col Name of column in input_table containing the integer identifier of data points |
| * @param model_table Name of table where the learned model to be used is stored |
| * @param output_table Name of table to store the results |
| * @param parallel A flag indicating whether the model to be used was learned in parallel |
| * @return Textual summary of the algorithm run |
| * |
| * @internal |
| * @sa This function is a wrapper for online_sv::svm_predict_batch(). |
| */ |
| CREATE OR REPLACE FUNCTION |
| MADLIB_SCHEMA.svm_predict_batch( |
| input_table text, |
| data_col text, |
| id_col text, |
| model_table text, |
| output_table text, |
| parallel bool) |
| RETURNS TEXT |
| AS $$ |
| |
| PythonFunctionBodyOnly(`kernel_machines', `online_sv') |
| |
| # schema_madlib comes from PythonFunctionBodyOnly |
| return online_sv.svm_predict_batch( input_table, data_col, id_col, model_table, output_table, parallel); |
| |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| -- Generate artificial training data |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svm_random_ind(d INT) RETURNS float8[] AS $$ |
| DECLARE |
| ret float8[]; |
| BEGIN |
| FOR i IN 1..(d-1) LOOP |
| ret[i] = RANDOM() * 40 - 20; |
| END LOOP; |
| IF (RANDOM() > 0.5) THEN |
| ret[d] = 10; |
| ELSE |
| ret[d] = -10; |
| END IF; |
| RETURN ret; |
| END |
| $$ LANGUAGE plpgsql |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svm_random_ind2(d INT) RETURNS float8[] AS $$ |
| DECLARE |
| ret float8[]; |
| BEGIN |
| FOR i IN 1..d LOOP |
| ret[i] = RANDOM() * 5 + 10; |
| IF (RANDOM() > 0.5) THEN ret[i] = -ret[i]; END IF; |
| END LOOP; |
| RETURN ret; |
| END |
| $$ LANGUAGE plpgsql |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_generate_reg_data(output_table text, num int, dim int) RETURNS VOID AS $$ |
| temp_table = "madlib_temp_" + output_table |
| plpy.execute("DROP TABLE IF EXISTS " + output_table) |
| plpy.execute("DROP TABLE IF EXISTS " + temp_table) |
| plpy.execute(""" |
| CREATE TABLE {temp_table} AS |
| SELECT |
| subq.val AS id, |
| MADLIB_SCHEMA.__svm_random_ind({dim}) AS ind |
| FROM |
| (SELECT generate_series(1, {num}) AS val) subq |
| m4_ifdef(`__POSTGRESQL__', `', `DISTRIBUTED BY (id)')""" |
| .format(temp_table=temp_table, dim=str(dim), num=str(num))) |
| plpy.execute(""" |
| CREATE TABLE {output_table} AS |
| SELECT id, ind, MADLIB_SCHEMA.__svm_target_reg_func(ind) AS label |
| FROM {temp_table} |
| m4_ifdef(`__POSTGRESQL__', `', `DISTRIBUTED BY (id)')""" |
| .format(output_table=output_table, temp_table=temp_table)) |
| plpy.execute("drop table madlib_temp_" + output_table) |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svm_target_reg_func(ind float8[]) RETURNS float8 AS $$ |
| DECLARE |
| dim int; |
| BEGIN |
| dim = array_upper(ind,1); |
| IF (ind[dim] = 10) THEN RETURN 50; END IF; |
| RETURN -50; |
| END |
| $$ LANGUAGE plpgsql |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_generate_cls_data(output_table text, num int, dim int) RETURNS VOID AS $$ |
| temp_table = "madlib_temp_" + output_table |
| plpy.execute("DROP TABLE IF EXISTS " + output_table) |
| plpy.execute("DROP TABLE IF EXISTS " + temp_table) |
| plpy.execute(""" |
| CREATE TABLE {temp_table} AS |
| SELECT |
| subq.val AS id, |
| MADLIB_SCHEMA.__svm_random_ind({dim}) AS ind |
| FROM |
| (SELECT generate_series(1, {num}) AS val) subq |
| m4_ifdef(`__POSTGRESQL__', `', `DISTRIBUTED BY (id)')""" |
| .format(temp_table=temp_table, dim=str(dim), num=str(num))) |
| plpy.execute(""" |
| CREATE TABLE {output_table} AS |
| SELECT id, ind, MADLIB_SCHEMA.__svm_target_cl_func(ind) AS label |
| FROM {temp_table} |
| m4_ifdef(`__POSTGRESQL__', `', `DISTRIBUTED BY (id)')""" |
| .format(output_table=output_table, temp_table=temp_table)) |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__svm_target_cl_func(ind float8[]) RETURNS float8 AS $$ |
| BEGIN |
| IF (ind[1] > 0 AND ind[2] < 0) THEN RETURN 1; END IF; |
| RETURN -1; |
| END |
| $$ LANGUAGE plpgsql |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_generate_nd_data(output_table text, num int, dim int) RETURNS VOID AS $$ |
| plpy.execute("drop table if exists " + output_table); |
| plpy.execute("create table " + output_table + " ( id int, ind float8[] ) m4_ifdef(`__POSTGRESQL__', `', `distributed by (id)')") |
| plpy.execute("INSERT INTO " + output_table + " SELECT a.val, MADLIB_SCHEMA.__svm_random_ind2(" + str(dim) + ") FROM (SELECT generate_series(1," + str(num) + ") AS val) AS a") |
| $$ LANGUAGE 'plpythonu' |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| |
| /** |
| * @brief Normalizes the data stored in a table, and save the normalized data in a new table. |
| * |
| * @param input_table Name of table/view containing the data points to be scored |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.svm_data_normalization(input_table TEXT) RETURNS VOID AS $$ |
| output_table = input_table + "_scaled" |
| plpy.execute("DROP TABLE IF EXISTS " + output_table) |
| plpy.execute("CREATE TABLE " + output_table + " ( id int, ind float8[], label int ) m4_ifdef(`__POSTGRESQL__', `', `distributed by (id)')") |
| plpy.execute("INSERT INTO " + output_table + " SELECT id, MADLIB_SCHEMA.svm_normalization(ind), label FROM " + input_table) |
| plpy.info("output table: %s" % output_table) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |