| /* ----------------------------------------------------------------------- *//** |
| * |
| * @file elastic_net.sql_in |
| * |
| * @brief SQL functions for elastic net regularization |
| * @date July 2012 |
| * |
| * @sa For a brief introduction to elastic net, see the module |
| * description \ref grp_elasticnet. |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /** |
| @addtogroup grp_elasticnet |
| |
| <div class="toc"><b>Contents</b><ul> |
| <li class="level1"><a href="#train">Training Function</a></li> |
| <li class="level1"><a href="#optimizer">Optimizer Parameters</a></li> |
| <li class="level1"><a href="#predict">Prediction Functions</a></li> |
| <li class="level1"><a href="#examples">Examples</a></li> |
| <li class="level1"><a href="#background">Technical Background</a></li> |
| <li class="level1"><a href="#literature">Literature</a></li> |
| <li class="level1"><a href="#related">Related Topics</a></li> |
| </ul></div> |
| |
| @brief Generates a regularized regression model for variable selection in linear |
| and logistic regression problems, combining the L1 and L2 penalties of the |
| lasso and ridge methods. |
| |
| This module implements elastic net regularization [1] for linear and logistic regression. |
| Regularization is a technique often used to prevent overfitting. |
| |
| @anchor train |
| @par Training Function |
| The training function has the following syntax: |
| <pre class="syntax"> |
| elastic_net_train( tbl_source, |
| tbl_result, |
| col_dep_var, |
| col_ind_var, |
| regress_family, |
| alpha, |
| lambda_value, |
| standardize, |
| grouping_col, |
| optimizer, |
| optimizer_params, |
| excluded, |
| max_iter, |
| tolerance |
| ) |
| </pre> |
| |
| \b Arguments |
| <DL class="arglist"> |
| <DT>tbl_source</DT> |
| <DD>TEXT. The name of the table containing the training data.</DD> |
| |
| <DT>tbl_result</DT> |
| <DD>TEXT. Name of the output table containing output model. |
| The output table produced by the elastic_net_train() function has the following columns: |
| <table class="output"> |
| <tr><th>regress_family</th> |
| <td>The regression type: 'gaussian' or 'binomial'.</td> |
| </tr> |
| <tr> |
| <th>features</th> |
| <td>Array of features (independent variables) passed to the algorithm.</td> |
| </tr> |
| <tr> |
| <th>features_selected</th> |
| <td>Array of features selected by the algorithm.</td> |
| </tr> |
| <tr> |
| <th>coef_nonzero</th> |
| <td>Coefficients of the selected features.</td> |
| </tr> |
| <tr> |
| <th>coef_all</th> |
| <td>Coefficients of all features, both selected and unselected.</td> |
| </tr> |
| <tr> |
| <th>intercept</th> |
| <td>Intercept for the model.</td> |
| </tr> |
| <tr> |
| <th>log_likelihood</th> |
| <td>Log of the likelihood value produced by the algorithm.</td> |
| </tr> |
| <tr> |
| <th>standardize</th> |
| <td>BOOLEAN. If data has been normalized, will be set to TRUE.</td> |
| </tr> |
| <tr> |
| <th>iteration_run</th> |
| <td>The number of iterations executed.</td> |
| </tr> |
| </table> |
| </DD> |
| |
| <DT>col_dep_var</DT> |
| <DD>TEXT. An expression for the dependent variable. |
| |
| @note Both \e col_dep_var and \e col_ind_var can be valid PostgreSQL |
| expressions. For example, <tt>col_dep_var = 'log(y+1)'</tt>, and <tt>col_ind_var |
| = 'array[exp(x[1]), x[2], 1/(1+x[3])]'</tt>. In the binomial case, you can |
| use a Boolean expression, for example, <tt>col_dep_var = 'y < 0'</tt>.</DD> |
| |
| <DT>col_ind_var</DT> |
| <DD>TEXT. An expression for the independent variables. Use \c '*' to |
| specify all columns of <em>tbl_source</em> except those listed in the |
| <em>excluded</em> string described below. If \e col_dep_var is a column name, it is |
| automatically excluded from the independent variables. However, if |
| \e col_dep_var is a valid PostgreSQL expression, any column names used |
| within the expression are only excluded if they are explicitly listed in the |
| \e excluded argument. Therefore, it is a good idea to add all column names involved in |
| the dependent variable expression to the <em>excluded</em> string.</DD> |
| |
| <DT>regress_family</DT> |
| <DD>TEXT. For regression type, specify either 'gaussian' ('linear') or 'binomial' ('logistic').</DD> |
| |
| <DT>alpha</DT> |
| <DD>FLOAT8. Elastic net control parameter with a value in the range [0, 1]. |
| A value of 1 means L1 regularization, and a value of 0 means L2 regularization.</DD> |
| |
| <DT>lambda_value</DT> |
| <DD>FLOAT8. Regularization parameter (must be positive).</DD> |
| |
| <DT>standardize (optional)</DT> |
| <DD>BOOLEAN, default: TRUE. Whether to normalize the data or not. |
| Setting to TRUE usually yields better results and faster convergence.</DD> |
| |
| <DT>grouping_col (optional)</DT> |
| <DD>TEXT, default: NULL. A single column or a list of comma-separated |
| columns that divides the input data into discrete groups, resulting in one |
| regression per group. When this value is NULL, no grouping is used and |
| a single model is generated for all data. |
| |
| @note Expressions are not currently supported for 'grouping_col'. |
| |
| <DT>optimizer (optional)</DT> |
| <DD>TEXT, default: 'fista'. Name of optimizer, either 'fista' or 'igd'. |
| FISTA [2] is an algorithm with a fast global rate of convergence for |
| solving linear inverse problems. Incremental gradient descent (IGD) |
| is a stochastic approach to minimizing an objective function [4].</DD> |
| |
| <DT>optimizer_params (optional)</DT> |
| <DD>TEXT, default: NULL. Optimizer parameters, delimited with commas. |
| These parameters differ depending on the value of \e optimizer parameter. |
| See the descriptions below for details.</DD> |
| |
| <DT>excluded (optional)</DT> |
| <DD>TEXT, default: NULL. If the \e col_ind_var input is '*' then \e excluded can |
| be provided as a comma-delimited list of column names that are to be excluded |
| from the features. |
| For example, <tt>'col1, col2'</tt>. If the \e col_ind_var is an array, |
| \e excluded must be a list of the integer array positions to exclude, |
| for example <tt>'1,2'</tt>. If this argument is NULL or an |
| empty string, no columns are excluded.</DD> |
| |
| <DT>max_iter (optional)</DT> |
| <DD>INTEGER, default: 1000. The maximum number of iterations allowed.</DD> |
| |
| <DT>tolerance (optional)</DT> |
| <DD>FLOAT8, default: 1e-6. This is the criterion to stop iterating. Both the |
| 'fista' and 'igd' optimizers compute the difference between the |
| log likelihood of two consecutive iterations, and when the difference is smaller |
| than \e tolerance or the iteration number is larger than \e max_iter, the |
| computation stops.</DD> |
| </DL> |
| |
| @anchor optimizer |
| @par Other Parameters |
| |
| For \e optimizer_params, there are several |
| parameters that can be supplied in a string containing a |
| comma-delimited list of name-value pairs . All of these named parameters are |
| optional and use the format "<param_name> = <value>". |
| |
| The parameters described below are organized by category: warmup, cross validation and |
| optimization. |
| |
| <em><b>Warmup parameters</b></em> |
| <pre class="syntax"> |
| $$ |
| warmup = <value>, |
| warmup_lambdas = <value>, |
| warmup_lambda_no = <value>, |
| warmup_tolerance = <value> |
| $$ |
| </pre> |
| |
| <DL class="arglist"> |
| <DT>warmup</DT> |
| <DD>Default: FALSE. If \e warmup is TRUE, a series of strictly descending lambda values |
| are used, which end with the lambda value that the user wants to calculate. |
| A larger lambda gives a sparser solution, and the sparse |
| solution is then used as the initial guess for the next lambda's solution, |
| which can speed up the computation for the next lambda. For larger data sets, |
| this can sometimes accelerate the whole computation and may in fact be faster than |
| computation with only a single lambda value.</DD> |
| |
| <DT>warmup_lambdas</DT> |
| <DD>Default: NULL. Set of lambda values to use when \e warmup is TRUE. |
| The default is NULL, which means that lambda values will be automatically generated.</DD> |
| |
| <DT>warmup_lambda_no</DT> |
| <DD>Default: 15. Number of lambda values used in \e warm-up. |
| If \e warmup_lambdas is not NULL, this value is overridden by the number of provided lambda values.</DD> |
| |
| <DT>warmup_tolerance</DT> |
| <DD>The value of tolerance used during warmup. The default value is the same as the |
| \e tolerance argument described above.</DD> |
| </DL> |
| |
| <em><b>Cross validation parameters</b></em> |
| @note Please note that for performance reasons, warmup is disabled whenever |
| cross validation is used. Also, cross validation is not supported if grouping is used. |
| |
| <pre class="syntax"> |
| $$ |
| n_folds = <value>, |
| validation_result = <value>, |
| lambda_value = <value>, |
| n_lambdas = <value>, |
| alpha = <value> |
| $$ |
| </pre> |
| |
| Hyperparameter optimization can be carried out using the built-in cross |
| validation mechanism, which is activated by assigning a value greater than 1 to |
| the parameter \e n_folds. |
| |
| The cross validation scores are the mean and standard deviation |
| of the accuracy when predicted on the validation fold, |
| averaged over all folds and all rows. For classification, the accuracy |
| metric used is the ratio of correct classifications. For regression, the |
| accuracy metric used is the negative of mean squared error (negative to |
| make it a concave problem, thus selecting \e max means the highest accuracy). |
| Cross validation scores are written out to a separate table with the |
| user specified name given in the 'validation_result' parameter. |
| |
| The values of a parameter to cross validate should be provided in a list. For |
| example, to regularize with the L1 norm and use a lambda value |
| from the set {0.3, 0.4, 0.5}, include 'lambda_value={0.3, 0.4, 0.5}'. |
| Note that the use of '{}' and '[]' are both valid here. |
| |
| <DL class="arglist"> |
| |
| <DT>n_folds</dt> |
| <DD>Default: 0. |
| Number of folds (k). Must be at least 2 to activate cross validation. |
| If a value of k > 2 is specified, each fold is then used as a validation set once, |
| while the other k - 1 folds form the training set. |
| </DD> |
| |
| <DT>validation_result</dt> |
| <DD>Default: NULL. |
| Name of the table to store the cross validation results, including the values of |
| parameters and their averaged error values. The table is only created if the name is not NULL. |
| </DD> |
| |
| <DT>lambda_value</DT> |
| <DD>Default: NULL. Set of regularization values to be used for cross validation. |
| The default is NULL, which means that lambda values will be automatically generated.</DD> |
| |
| <DT>n_lambdas</DT> |
| <DD>Default: 15. Number of lambdas to cross validate over. If a list of lambda values is not |
| provided in the \e lambda_value set above, this parameter can be used to |
| autogenerate the set of lambdas. If the \e lambda_value set is not NULL, this value |
| is overridden by the number of provided lambda values. </DD> |
| |
| @note If you want to cross validate over alpha only and not lambda, |
| then set \e lambda_value to NULL and \e n_lambdas to 0. In this case, |
| cross validation will be done on the set of \e alpha values specified |
| in the next parameter. The lambda value used will be the one |
| specified in the main function call at the top of this page. |
| |
| <DT>alpha</DT> |
| <DD>Elastic net control parameter. This is a list of values to apply |
| cross validation on. (Note that alpha values are not autogenerated.) |
| If not specified, the alpha value used will be the one |
| specified in the main function call at the top of this page. |
| </DD> |
| </DL> |
| |
| <em><b>Optimizer parameters</b></em> |
| |
| \b FISTA Parameters |
| <pre class="syntax"> |
| $$ |
| max_stepsize = <value>, |
| eta = <value>, |
| use_active_set = <value>, |
| activeset_tolerance = <value>, |
| random_stepsize = <value> |
| $$ |
| </pre> |
| |
| <DL class="arglist"> |
| <DT>max_stepsize</dt> |
| <DD>Default: 4.0. Initial backtracking step size. At each iteration, the algorithm first tries |
| <em>stepsize = max_stepsize</em>, and if it does not work out, it then tries a |
| smaller step size, <em>stepsize = stepsize/eta</em>, where \e eta must |
| be larger than 1. At first glance, this seems to perform repeated iterations for even one step, but using a larger step size actually greatly increases the computation speed and minimizes the total number of iterations. A careful choice of \e max_stepsize can decrease the computation time by more than 10 times.</DD> |
| |
| <DT>eta</DT> |
| <DD>Default: 2.0 If stepsize does not work, \e stepsize/\e eta is tried. Must be greater than 1. </DD> |
| |
| <DT>use_active_set</DT> |
| <DD>Default: FALSE. If \e use_active_set is TRUE, an active-set method is used to |
| speed up the computation. Considerable speedup is obtained by organizing the |
| iterations around the active set of features—those with nonzero coefficients. |
| After a complete cycle through all the variables, we iterate only on the active |
| set until convergence. If another complete cycle does not change the active set, |
| we are done. Otherwise, the process is repeated.</DD> |
| |
| <DT>activeset_tolerance</DT> |
| <DD>The value of tolerance used during active set calculation. The default |
| value is the same as the \e tolerance argument described above. </DD> |
| |
| <DT>random_stepsize</DT> |
| <DD>Default: FALSE. Whether to add some randomness to the step size. Sometimes, this can speed |
| up the calculation.</DD> |
| </DL> |
| |
| \b IGD parameters |
| <pre class="syntax"> |
| $$ |
| stepsize = <value>, |
| step_decay = <value>, |
| threshold = <value>, |
| parallel = <value> |
| $$ |
| </pre> |
| <DL class="arglist"> |
| |
| <DT>stepsize</DT> |
| <DD>The default is 0.01.</DD> |
| |
| <DT>step_decay</DT> |
| <DD>The actual stepsize used for current step is (previous stepsize) / exp(step_decay). The default value is 0, which means that a constant stepsize is used in IGD.</DD> |
| |
| <DT>threshold</DT> |
| <DD>Default: 1e-10. When a coefficient is really small, set this coefficient to be 0. |
| |
| Due to the stochastic nature of SGD, we can only obtain very small values for |
| the fitting coefficients. Therefore, \e threshold is needed at the end of |
| the computation to screen out tiny values and hard-set them to |
| zeros. This is accomplished as follows: (1) multiply each coefficient with the |
| standard deviation of the corresponding feature; (2) compute the average of |
| absolute values of re-scaled coefficients; (3) divide each rescaled coefficient |
| with the average, and if the resulting absolute value is smaller than |
| \e threshold, set the original coefficient to zero.</DD> |
| |
| <DT>parallel</DT> |
| <DD>Whether to run the computation on multiple segments. The default is TRUE. |
| |
| SGD is a sequential algorithm in nature. When running in a distributed |
| manner, each segment of the data runs its own SGD model and then the models |
| are averaged to get a model for each iteration. This averaging might slow |
| down the convergence speed, but it affords the ability to process |
| large datasets on a cluster of machines. This algorithm, therefore, provides the |
| \e parallel option to allow you to choose whether to do parallel computation. |
| </DD> |
| </DL> |
| |
| |
| @anchor predict |
| @par Prediction Function |
| |
| <h4>Per-Tuple Prediction</h4> |
| The prediction function returns a double value for the Gaussian family |
| and a Boolean value for the Binomial family. |
| |
| The predict function has the following syntax (elastic_net_gaussian_predict() and elastic_net_binomial_predict()): |
| <pre class="syntax"> |
| elastic_net_<family>_predict( |
| coefficients, |
| intercept, |
| ind_var |
| ) |
| </pre> |
| |
| \b Arguments |
| <DL class="arglist"> |
| <DT>coefficients</DT> |
| <DD>DOUBLE PRECISION[]. Fitting coefficients, usually \e coef_all or \e coef_nonzero.</DD> |
| <DT>intercept</DT> |
| <DD>DOUBLE PRECISION. Intercept for the model.</DD> |
| <DT>ind_var</DT> |
| <DD>DOUBLE PRECISION[]. Independent variables that correspond to coefficients. Use <EM>features</EM> column in <EM>tbl_result</EM> for \e coef_all, and <EM>features_selected</EM> for \e coef_nonzero. See the <a href="#additional_example">examples for this case below</a>. |
| @note Unexpected results or errors may be returned in the case that this argument \e ind_var is not specified properly.</DD> |
| </DL> |
| |
| For the binomial family, there is a function (elastic_net_binomial_prob()) that outputs the probability of the instance being TRUE: |
| <pre class="syntax"> |
| elastic_net_binomial_prob( |
| coefficients, |
| intercept, |
| ind_var |
| ) |
| </pre> |
| |
| <h4>Per-Table Prediction</h4> |
| Alternatively, you can use another prediction function that stores the prediction result in a table (elastic_net_predict()). |
| This is useful if you want to use elastic net together with the |
| general cross validation function. |
| <pre class="syntax"> |
| elastic_net_predict( tbl_model, |
| tbl_new_sourcedata, |
| col_id, |
| tbl_predict |
| ) |
| </pre> |
| \b Arguments |
| <dl class="arglist"> |
| <dt>tbl_model</dt> |
| <dd>TEXT. Name of the table containing the output from the training function.</dd> |
| <dt>tbl_new_sourcedata</dt> |
| <dd>TEXT. Name of the table containing the new source data.</dd> |
| <dt>col_id</dt> |
| <dd>TEXT. Unique ID associated with each row.</dd> |
| <dt>tbl_predict</dt> |
| <dd>TEXT. Name of table to store the prediction result. </dd> |
| </dl> |
| You do not need to specify whether the model is "linear" or "logistic" because this information is already included in the \e tbl_model table. |
| |
| @anchor examples |
| @examp |
| |
| -# Display online help for the elastic_net_train() function: |
| <pre class="example"> |
| SELECT madlib.elastic_net_train(); |
| </pre> |
| |
| -# Create an input data set of house prices and features: |
| <pre class="example"> |
| DROP TABLE IF EXISTS houses; |
| CREATE TABLE houses ( id INT, |
| tax INT, |
| bedroom INT, |
| bath FLOAT, |
| price INT, |
| size INT, |
| lot INT, |
| zipcode INT); |
| INSERT INTO houses (id, tax, bedroom, bath, price, size, lot, zipcode) VALUES |
| (1 , 590 , 2 , 1 , 50000 , 770 , 22100 , 94301), |
| (2 , 1050 , 3 , 2 , 85000 , 1410 , 12000 , 94301), |
| (3 , 20 , 3 , 1 , 22500 , 1060 , 3500 , 94301), |
| (4 , 870 , 2 , 2 , 90000 , 1300 , 17500 , 94301), |
| (5 , 1320 , 3 , 2 , 133000 , 1500 , 30000 , 94301), |
| (6 , 1350 , 2 , 1 , 90500 , 820 , 25700 , 94301), |
| (7 , 2790 , 3 , 2.5 , 260000 , 2130 , 25000 , 94301), |
| (8 , 680 , 2 , 1 , 142500 , 1170 , 22000 , 94301), |
| (9 , 1840 , 3 , 2 , 160000 , 1500 , 19000 , 94301), |
| (10 , 3680 , 4 , 2 , 240000 , 2790 , 20000 , 94301), |
| (11 , 1660 , 3 , 1 , 87000 , 1030 , 17500 , 94301), |
| (12 , 1620 , 3 , 2 , 118600 , 1250 , 20000 , 94301), |
| (13 , 3100 , 3 , 2 , 140000 , 1760 , 38000 , 94301), |
| (14 , 2070 , 2 , 3 , 148000 , 1550 , 14000 , 94301), |
| (15 , 650 , 3 , 1.5 , 65000 , 1450 , 12000 , 94301), |
| (16 , 770 , 2 , 2 , 91000 , 1300 , 17500 , 76010), |
| (17 , 1220 , 3 , 2 , 132300 , 1500 , 30000 , 76010), |
| (18 , 1150 , 2 , 1 , 91100 , 820 , 25700 , 76010), |
| (19 , 2690 , 3 , 2.5 , 260011 , 2130 , 25000 , 76010), |
| (20 , 780 , 2 , 1 , 141800 , 1170 , 22000 , 76010), |
| (21 , 1910 , 3 , 2 , 160900 , 1500 , 19000 , 76010), |
| (22 , 3600 , 4 , 2 , 239000 , 2790 , 20000 , 76010), |
| (23 , 1600 , 3 , 1 , 81010 , 1030 , 17500 , 76010), |
| (24 , 1590 , 3 , 2 , 117910 , 1250 , 20000 , 76010), |
| (25 , 3200 , 3 , 2 , 141100 , 1760 , 38000 , 76010), |
| (26 , 2270 , 2 , 3 , 148011 , 1550 , 14000 , 76010), |
| (27 , 750 , 3 , 1.5 , 66000 , 1450 , 12000 , 76010); |
| </pre> |
| -# Train the model: |
| <pre class="example"> |
| DROP TABLE IF EXISTS houses_en, houses_en_summary; |
| SELECT madlib.elastic_net_train( 'houses', -- Source table |
| 'houses_en', -- Result table |
| 'price', -- Dependent variable |
| 'array[tax, bath, size]', -- Independent variable |
| 'gaussian', -- Regression family |
| 0.5, -- Alpha value |
| 0.1, -- Lambda value |
| TRUE, -- Standardize |
| NULL, -- Grouping column(s) |
| 'fista', -- Optimizer |
| '', -- Optimizer parameters |
| NULL, -- Excluded columns |
| 10000, -- Maximum iterations |
| 1e-6 -- Tolerance value |
| ); |
| </pre> |
| -# View the resulting model: |
| <pre class="example"> |
| -- Turn on expanded display to make it easier to read results. |
| \\x on |
| SELECT * FROM houses_en; |
| </pre> |
| Result: |
| <pre class="result"> |
| -[ RECORD 1 ]-----+------------------------------------------- |
| family | gaussian |
| features | {tax,bath,size} |
| features_selected | {tax,bath,size} |
| coef_nonzero | {22.785201806,10707.9664343,54.7959774173} |
| coef_all | {22.785201806,10707.9664343,54.7959774173} |
| intercept | -7798.71393905 |
| log_likelihood | -512248641.971 |
| standardize | t |
| iteration_run | 10000 |
| </pre> |
| -# Use the prediction function to evaluate residuals: |
| <pre class="example"> |
| \\x off |
| SELECT id, price, predict, price - predict AS residual |
| FROM ( |
| SELECT |
| houses.*, |
| madlib.elastic_net_gaussian_predict( |
| m.coef_all, -- Coefficients |
| m.intercept, -- Intercept |
| ARRAY[tax,bath,size] -- Features (corresponding to coefficients) |
| ) AS predict |
| FROM houses, houses_en m) s |
| ORDER BY id; |
| </pre> |
| Result: |
| <pre class="result"> |
| id | price | predict | residual |
| ----+--------+------------------+------------------- |
| 1 | 50000 | 58545.391894031 | -8545.391894031 |
| 2 | 85000 | 114804.077663003 | -29804.077663003 |
| 3 | 22500 | 61448.835664388 | -38948.835664388 |
| 4 | 90000 | 104675.17768007 | -14675.17768007 |
| 5 | 133000 | 125887.70644358 | 7112.29355642 |
| 6 | 90500 | 78601.843595366 | 11898.156404634 |
| 7 | 260000 | 199257.358231079 | 60742.641768921 |
| 8 | 142500 | 82514.559377081 | 59985.440622919 |
| 9 | 160000 | 137735.93215082 | 22264.06784918 |
| 10 | 240000 | 250347.627648647 | -10347.627648647 |
| 11 | 87000 | 97172.428263539 | -10172.428263539 |
| 12 | 118600 | 119024.150628605 | -424.150628604999 |
| 13 | 140000 | 180692.127913358 | -40692.127913358 |
| 14 | 148000 | 156424.249824545 | -8424.249824545 |
| 15 | 65000 | 102527.938104575 | -37527.938104575 |
| 16 | 91000 | 102396.67273637 | -11396.67273637 |
| 17 | 132300 | 123609.20149988 | 8690.79850012 |
| 18 | 91100 | 74044.833707966 | 17055.166292034 |
| 19 | 260011 | 196978.853287379 | 63032.146712621 |
| 20 | 141800 | 84793.064320781 | 57006.935679219 |
| 21 | 160900 | 139330.88561141 | 21569.11438859 |
| 22 | 239000 | 248524.823693687 | -9524.82369368701 |
| 23 | 81010 | 95805.325297319 | -14795.325297319 |
| 24 | 117910 | 118340.599145495 | -430.599145494998 |
| 25 | 141100 | 182970.632857058 | -41870.632857058 |
| 26 | 148011 | 160981.259711945 | -12970.259711945 |
| 27 | 66000 | 104806.443048275 | -38806.443048275 |
| </pre> |
| |
| <h4>Example with Grouping</h4> |
| -# Reuse the houses table above and train the model by grouping |
| on zip code: |
| <pre class="example"> |
| DROP TABLE IF EXISTS houses_en1, houses_en1_summary; |
| SELECT madlib.elastic_net_train( 'houses', -- Source table |
| 'houses_en1', -- Result table |
| 'price', -- Dependent variable |
| 'array[tax, bath, size]', -- Independent variable |
| 'gaussian', -- Regression family |
| 0.5, -- Alpha value |
| 0.1, -- Lambda value |
| TRUE, -- Standardize |
| 'zipcode', -- Grouping column(s) |
| 'fista', -- Optimizer |
| '', -- Optimizer parameters |
| NULL, -- Excluded columns |
| 10000, -- Maximum iterations |
| 1e-6 -- Tolerance value |
| ); |
| </pre> |
| -# View the resulting model with a separate model for each group: |
| <pre class="example"> |
| -- Turn on expanded display to make it easier to read results. |
| \\x on |
| SELECT * FROM houses_en1; |
| </pre> |
| Result: |
| <pre class="result"> |
| -[ RECORD 1 ]-----+-------------------------------------------- |
| zipcode | 94301 |
| family | gaussian |
| features | {tax,bath,size} |
| features_selected | {tax,bath,size} |
| coef_nonzero | {27.0542096962,12351.5244083,47.5833289771} |
| coef_all | {27.0542096962,12351.5244083,47.5833289771} |
| intercept | -7191.19791597 |
| log_likelihood | -519199964.967 |
| standardize | t |
| iteration_run | 10000 |
| -[ RECORD 2 ]-----+-------------------------------------------- |
| zipcode | 76010 |
| family | gaussian |
| features | {tax,bath,size} |
| features_selected | {tax,bath,size} |
| coef_nonzero | {15.6325953499,10166.6608469,57.8689916035} |
| coef_all | {15.6325953499,10166.6608469,57.8689916035} |
| intercept | 513.912201627 |
| log_likelihood | -538806528.45 |
| standardize | t |
| iteration_run | 10000 |
| </pre> |
| -# Use the prediction function to evaluate residuals: |
| <pre class="example"> |
| \\x off |
| SELECT madlib.elastic_net_predict( |
| 'houses_en1', -- Model table |
| 'houses', -- New source data table |
| 'id', -- Unique ID associated with each row |
| 'houses_en1_prediction' -- Table to store prediction result |
| ); |
| SELECT houses.id, |
| houses.price, |
| houses_en1_prediction.prediction, |
| houses.price - houses_en1_prediction.prediction AS residual |
| FROM houses_en1_prediction, houses |
| WHERE houses.id = houses_en1_prediction.id ORDER BY id; |
| </pre> |
| |
| @anchor additional_example |
| <h4>Example where coef_nonzero is different from coef_all</h4> |
| -# Reuse the <a href="#examples">houses</a> table above and train the model with alpha=1 (L1) |
| and a large lambda value (30000). |
| <pre class="example"> |
| DROP TABLE IF EXISTS houses_en2, houses_en2_summary; |
| SELECT madlib.elastic_net_train( 'houses', -- Source table |
| 'houses_en2', -- Result table |
| 'price', -- Dependent variable |
| 'array[tax, bath, size]', -- Independent variable |
| 'gaussian', -- Regression family |
| 1, -- Alpha value |
| 30000, -- Lambda value |
| TRUE, -- Standardize |
| NULL, -- Grouping column(s) |
| 'fista', -- Optimizer |
| '', -- Optimizer parameters |
| NULL, -- Excluded columns |
| 10000, -- Maximum iterations |
| 1e-6 -- Tolerance value |
| ); |
| </pre> |
| -# View the resulting model and see coef_nonzero is different from coef_all: |
| <pre class="example"> |
| -- Turn on expanded display to make it easier to read results. |
| \\x on |
| SELECT * FROM houses_en2; |
| </pre> |
| Result: |
| <pre class="result"> |
| -[ RECORD 1 ]-----+-------------------------------- |
| family | gaussian |
| features | {tax,bath,size} |
| features_selected | {tax,size} |
| coef_nonzero | {6.94744249834,29.7137297658} |
| coef_all | {6.94744249834,0,29.7137297658} |
| intercept | 74445.7039382 |
| log_likelihood | -1635348585.07 |
| standardize | t |
| iteration_run | 151 |
| </pre> |
| -# We can still use the prediction function with \e coef_all to evaluate residuals: |
| <pre class="example"> |
| \\x off |
| SELECT id, price, predict, price - predict AS residual |
| FROM ( |
| SELECT |
| houses.*, |
| madlib.elastic_net_gaussian_predict( |
| m.coef_all, -- All coefficients |
| m.intercept, -- Intercept |
| ARRAY[tax,bath,size] -- All features |
| ) AS predict |
| FROM houses, houses_en2 m) s |
| ORDER BY id; |
| </pre> |
| -# We can speed up the prediction function with \e coef_nonzero |
| to evaluate residuals. This requires the user to examine the |
| \e feature_selected column in the result table to construct the correct |
| set of independent variables to provide to the prediction function: |
| <pre class="example"> |
| \\x off |
| SELECT id, price, predict, price - predict AS residual |
| FROM ( |
| SELECT |
| houses.*, |
| madlib.elastic_net_gaussian_predict( |
| m.coef_nonzero, -- Non-zero coefficients |
| m.intercept, -- Intercept |
| ARRAY[tax,size] -- Features corresponding to non-zero coefficients |
| ) AS predict |
| FROM houses, houses_en2 m) s |
| ORDER BY id; |
| </pre> |
| The two queries above will result in same residuals: |
| <pre class="result"> |
| id | price | predict | residual |
| ----+--------+------------------+------------------- |
| 1 | 50000 | 101424.266931887 | -51424.2669318866 |
| 2 | 85000 | 123636.877531235 | -38636.877531235 |
| 3 | 22500 | 106081.206339915 | -83581.2063399148 |
| 4 | 90000 | 119117.827607296 | -29117.8276072958 |
| 5 | 133000 | 128186.922684709 | 4813.0773152912 |
| 6 | 90500 | 108190.009718915 | -17690.009718915 |
| 7 | 260000 | 157119.312909723 | 102880.687090277 |
| 8 | 142500 | 113935.028663057 | 28564.9713369428 |
| 9 | 160000 | 131799.592783846 | 28200.4072161544 |
| 10 | 240000 | 182913.598378673 | 57086.4016213268 |
| 11 | 87000 | 116583.600144218 | -29583.6001442184 |
| 12 | 118600 | 122842.722992761 | -4242.7229927608 |
| 13 | 140000 | 148278.940070862 | -8278.94007086201 |
| 14 | 148000 | 134883.191046754 | 13116.8089532462 |
| 15 | 65000 | 122046.449722531 | -57046.449722531 |
| 16 | 91000 | 118423.083357462 | -27423.0833574618 |
| 17 | 132300 | 127492.178434875 | 4807.8215651252 |
| 18 | 91100 | 106800.521219247 | -15700.521219247 |
| 19 | 260011 | 156424.568659889 | 103586.431340111 |
| 20 | 141800 | 114629.772912891 | 27170.2270871088 |
| 21 | 160900 | 132285.913758729 | 28614.0862412706 |
| 22 | 239000 | 182357.802978806 | 56642.197021194 |
| 23 | 81010 | 116166.753594318 | -35156.753594318 |
| 24 | 117910 | 122634.299717811 | -4724.29971781059 |
| 25 | 141100 | 148973.684320696 | -7873.68432069599 |
| 26 | 148011 | 136272.679546422 | 11738.3204535782 |
| 27 | 66000 | 122741.193972365 | -56741.193972365 |
| (27 rows) |
| </pre> |
| |
| <h4>Example with Cross Validation</h4> |
| -# Reuse the houses table above. |
| Here we use 3-fold cross validation with 3 automatically generated |
| lambda values and 3 specified alpha values. (This can take some time to |
| run since elastic net is effectively being called 27 times for |
| these combinations, then a 28th time for the whole dataset.) |
| <pre class="example"> |
| DROP TABLE IF EXISTS houses_en3, houses_en3_summary, houses_en3_cv; |
| SELECT madlib.elastic_net_train( 'houses', -- Source table |
| 'houses_en3', -- Result table |
| 'price', -- Dependent variable |
| 'array[tax, bath, size]', -- Independent variable |
| 'gaussian', -- Regression family |
| 0.5, -- Alpha value |
| 0.1, -- Lambda value |
| TRUE, -- Standardize |
| NULL, -- Grouping column(s) |
| 'fista', -- Optimizer |
| $$ n_folds = 3, -- Cross validation parameters |
| validation_result=houses_en3_cv, |
| n_lambdas = 3, |
| alpha = {0, 0.1, 1} |
| $$, |
| NULL, -- Excluded columns |
| 10000, -- Maximum iterations |
| 1e-6 -- Tolerance value |
| ); |
| SELECT * FROM houses_en3; |
| </pre> |
| <pre class="result"> |
| -[ RECORD 1 ]-----+-------------------------------------------- |
| family | gaussian |
| features | {tax,bath,size} |
| features_selected | {tax,bath,size} |
| coef_nonzero | {22.4584188479,11657.0739045,52.1624090811} |
| coef_all | {22.4584188479,11657.0739045,52.1624090811} |
| intercept | -5067.33396522 |
| log_likelihood | -543193170.15 |
| standardize | t |
| iteration_run | 10000 |
| </pre> |
| |
| -# Details of the cross validation: |
| <pre class="example"> |
| SELECT * FROM houses_en3_cv ORDER BY mean_neg_loss DESC; |
| </pre> |
| <pre class="result"> |
| alpha | lambda_value | mean_neg_loss | std_neg_loss |
| -------+--------------+------------------------------------------+ |
| 0.0 | 0.1 | -36094.4685768 | 10524.4473253 |
| 0.1 | 0.1 | -36136.2448004 | 10682.4136993 |
| 1.0 | 100.0 | -37007.9496501 | 12679.3781975 |
| 1.0 | 0.1 | -37018.1019927 | 12716.7438015 |
| 0.1 | 100.0 | -59275.6940173 | 9764.50064237 |
| 0.0 | 100.0 | -59380.252681 | 9763.26373034 |
| 1.0 | 100000.0 | -60353.0220769 | 9748.10305107 |
| 0.1 | 100000.0 | {large neg number} | {large pos number} |
| 0.0 | 100000.0 | {large neg number} | {large pos number} |
| (9 rows) |
| </pre> |
| |
| @anchor notes |
| @par Note |
| It is \b strongly \b recommended that you run |
| \c elastic_net_train() on a subset of the data with a limited |
| \e max_iter before applying it to the full data set with a large |
| \e max_iter. In the pre-run, you can adjust the parameters to get the |
| best performance and then apply the best set of parameters to the whole data |
| set. |
| |
| |
| @anchor background |
| @par Technical Background |
| |
| Elastic net regularization seeks to find a weight vector that, for any given training example set, minimizes: |
| \f[\min_{w \in R^N} L(w) + \lambda \left(\frac{(1-\alpha)}{2} \|w\|_2^2 + \alpha \|w\|_1 \right)\f] |
| where \f$L\f$ is the metric function that the user wants to minimize. Here \f$ \alpha \in [0,1] \f$ |
| and \f$ lambda \geq 0 \f$. If \f$alpha = 0\f$, we have the ridge regularization (known also as Tikhonov regularization), and if \f$\alpha = 1\f$, we have the LASSO regularization. |
| |
| For the Gaussian response family (or linear model), we have |
| \f[L(\vec{w}) = \frac{1}{2}\left[\frac{1}{M} \sum_{m=1}^M (w^{t} x_m + w_{0} - y_m)^2 \right] |
| \f] |
| |
| For the Binomial response family (or logistic model), we have |
| \f[ |
| L(\vec{w}) = \sum_{m=1}^M\left[y_m \log\left(1 + e^{-(w_0 + |
| \vec{w}\cdot\vec{x}_m)}\right) + (1-y_m) \log\left(1 + e^{w_0 + |
| \vec{w}\cdot\vec{x}_m}\right)\right]\ , |
| \f] |
| where \f$y_m \in {0,1}\f$. |
| |
| To get better convergence, one can rescale the value of each element of x |
| \f[ x' \leftarrow \frac{x - \bar{x}}{\sigma_x} \f] |
| and for Gaussian case we also let |
| \f[y' \leftarrow y - \bar{y} \f] |
| and then minimize with the regularization terms. |
| At the end of the calculation, the orginal scales will be restored and an |
| intercept term will be obtained at the same time as a by-product. |
| |
| Note that fitting after scaling is not equivalent to directly fitting. |
| |
| @anchor literature |
| @literature |
| |
| [1] Elastic net regularization, http://en.wikipedia.org/wiki/Elastic_net_regularization |
| |
| [2] Beck, A. and M. Teboulle (2009), A fast iterative shrinkage-thresholding algorithm for linear inverse problems. SIAM J. on Imaging Sciences 2(1), 183-202. |
| |
| [3] Shai Shalev-Shwartz and Ambuj Tewari, Stochastic Methods for L1 Regularized Loss Minimization. Proceedings of the 26th International Conference on Machine Learning, Montreal, Canada, 2009. |
| |
| [4] Stochastic gradient descent, https://en.wikipedia.org/wiki/Stochastic_gradient_descent |
| |
| @anchor related |
| @par Related Topics |
| |
| File elastic_net.sql_in documenting the SQL functions. |
| */ |
| |
| ------------------------------------------------------------------------ |
| |
| /** |
| * @brief Interface for elastic net |
| * |
| * @param tbl_source Name of data source table |
| * @param tbl_result Name of the table to store the results |
| * @param col_ind_var Name of independent variable column, independent variable is an array |
| * @param col_dep_var Name of dependent variable column |
| * @param regress_family Response type (gaussian or binomial) |
| * @param alpha The elastic net parameter, [0, 1] |
| * @param lambda_value The regularization parameter |
| * @param standardize Whether to normalize the variables (default True) |
| * @param grouping_col List of columns on which to apply grouping |
| * (currently only a placeholder) |
| * @param optimizer The optimization algorithm, 'fista' or 'igd'. Default is 'fista' |
| * @param optimizer_params Parameters of the above optimizer, |
| * the format is 'arg = value, ...'. Default is NULL |
| * @param excluded Which columns to exclude? Default is NULL |
| * (applicable only if col_ind_var is set as * or a column of array, |
| * column names as 'col1, col2, ...' if col_ind_var is '*'; |
| * element indices as '1,2,3, ...' if col_ind_var is a column of array) |
| * @param max_iter Maximum number of iterations to run the algorithm |
| * (default value of 10000) |
| * @param tolerance Iteration stopping criteria. Default is 1e-6 |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_dep_var TEXT, |
| col_ind_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION, |
| standardize BOOLEAN, |
| grouping_col TEXT, |
| optimizer TEXT, |
| optimizer_params TEXT, |
| excluded TEXT, |
| max_iter INTEGER, |
| tolerance DOUBLE PRECISION |
| ) RETURNS VOID AS $$ |
| PythonFunctionBodyOnly(`elastic_net', `elastic_net') |
| with AOControl(False): |
| return elastic_net.elastic_net_train( |
| schema_madlib, tbl_source, tbl_result, col_dep_var, |
| col_ind_var, regress_family, alpha, lambda_value, |
| standardize, grouping_col, optimizer, optimizer_params, |
| excluded, max_iter, tolerance) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| ------------------------------------------------------------------------ |
| -- Overloaded functions |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_ind_var TEXT, |
| col_dep_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION, |
| standardization BOOLEAN, |
| grouping_columns TEXT, |
| optimizer TEXT, |
| optimizer_params TEXT, |
| excluded TEXT, |
| max_iter INTEGER |
| ) RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8, |
| $9, $10, $11, $12, $13, 1e-6); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_ind_var TEXT, |
| col_dep_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION, |
| standardization BOOLEAN, |
| grouping_columns TEXT, |
| optimizer TEXT, |
| optimizer_params TEXT, |
| excluded TEXT |
| ) RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8, |
| $9, $10, $11, $12, 1000); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_ind_var TEXT, |
| col_dep_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION, |
| standardization BOOLEAN, |
| grouping_columns TEXT, |
| optimizer TEXT, |
| optimizer_params TEXT |
| ) RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8, |
| $9, $10, $11, NULL); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_ind_var TEXT, |
| col_dep_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION, |
| standardization BOOLEAN, |
| grouping_columns TEXT, |
| optimizer TEXT |
| ) RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8, |
| $9, $10, NULL::TEXT); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_ind_var TEXT, |
| col_dep_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION, |
| standardization BOOLEAN, |
| grouping_columns TEXT |
| ) RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8, |
| $9, 'FISTA'); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_ind_var TEXT, |
| col_dep_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION, |
| standardization BOOLEAN |
| ) RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, $8, |
| NULL); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| tbl_source TEXT, |
| tbl_result TEXT, |
| col_ind_var TEXT, |
| col_dep_var TEXT, |
| regress_family TEXT, |
| alpha DOUBLE PRECISION, |
| lambda_value DOUBLE PRECISION |
| ) RETURNS VOID AS $$ |
| BEGIN |
| PERFORM MADLIB_SCHEMA.elastic_net_train($1, $2, $3, $4, $5, $6, $7, True); |
| END; |
| $$ LANGUAGE plpgsql VOLATILE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| ------------------------------------------------------------------------ |
| |
| /** |
| * @brief Help function, to print out the supported families |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train () |
| RETURNS TEXT AS $$ |
| PythonFunction(elastic_net, elastic_net, elastic_net_help) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| |
| /** |
| * @brief Help function, to print out the supported optimizer for a family |
| * or print out the parameter list for an optimizer |
| * |
| * @param family_or_optimizer Response type, 'gaussian' or 'binomial', or |
| * optimizer type |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_train ( |
| family_or_optimizer TEXT |
| ) RETURNS TEXT AS $$ |
| PythonFunction(elastic_net, elastic_net, elastic_net_help) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| ------------------------------------------------------------------------ |
| ------------------------------------------------------------------------ |
| |
| /** |
| * @brief Prediction and put the result in a table |
| * can be used together with General-CV |
| * @param tbl_model The result from elastic_net_train |
| * @param tbl_new_source Data table |
| * @param col_id Unique ID associated with each row |
| * @param tbl_predict Prediction result |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_predict ( |
| tbl_model TEXT, |
| tbl_new_source TEXT, |
| col_id TEXT, |
| tbl_predict TEXT |
| ) RETURNS VOID AS $$ |
| PythonFunction(elastic_net, elastic_net, elastic_net_predict_all) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| ------------------------------------------------------------------------ |
| |
| /** |
| * @brief Prediction use learned coefficients for a given example |
| * |
| * @param regress_family model family |
| * @param coefficients The fitting coefficients |
| * @param intercept The fitting intercept |
| * @param ind_var Features (independent variables) |
| * @returns float8 |
| * |
| * Note: Please use function elastic_net_gaussian_predict() or |
| * elastic_net_binomial_predict() instead if you could. |
| * This may be deprecated in the future, as users are confused between this |
| * function and the table function with the same name. |
| * |
| * When regress_family is 'binomial' or 'logistic', |
| * this function returns 1 for True and 0 for False |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_predict ( |
| regress_family TEXT, |
| coefficients DOUBLE PRECISION[], |
| intercept DOUBLE PRECISION, |
| ind_var DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS $$ |
| DECLARE |
| family_name TEXT; |
| binomial_result BOOLEAN; |
| BEGIN |
| family_name := lower(regress_family); |
| |
| IF family_name = 'gaussian' OR family_name = 'linear' THEN |
| RETURN MADLIB_SCHEMA.elastic_net_gaussian_predict(coefficients, intercept, ind_var); |
| END IF; |
| |
| IF family_name = 'binomial' OR family_name = 'logistic' THEN |
| binomial_result := MADLIB_SCHEMA.elastic_net_binomial_predict(coefficients, intercept, ind_var); |
| IF binomial_result THEN |
| return 1; |
| ELSE |
| return 0; |
| END IF; |
| END IF; |
| |
| RAISE EXCEPTION 'This regression family is not supported!'; |
| END; |
| $$ LANGUAGE plpgsql IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| ------------------------------------------------------------------------ |
| |
| /** |
| * @brief Prediction for linear models use learned coefficients for a given example |
| * |
| * @param coefficients Linear fitting coefficients |
| * @param intercept Linear fitting intercept |
| * @param ind_var Features (independent variables) |
| * |
| * returns a double value |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_gaussian_predict ( |
| coefficients DOUBLE PRECISION[], |
| intercept DOUBLE PRECISION, |
| ind_var DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS |
| 'MODULE_PATHNAME', '__elastic_net_gaussian_predict' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| /** |
| * @brief Prediction for logistic models use learned coefficients for a given example |
| * |
| * @param coefficients Logistic fitting coefficients |
| * @param intercept Logistic fitting intercept |
| * @param ind_var Features (independent variables) |
| * |
| * returns a boolean value |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_binomial_predict ( |
| coefficients DOUBLE PRECISION[], |
| intercept DOUBLE PRECISION, |
| ind_var DOUBLE PRECISION[] |
| ) RETURNS BOOLEAN AS |
| 'MODULE_PATHNAME', '__elastic_net_binomial_predict' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| /** |
| * @brief Compute the probability of belonging to the True class for a given observation |
| * |
| * @param coefficients Logistic fitting coefficients |
| * @param intercept Logistic fitting intercept |
| * @param ind_var Features (independent variables) |
| * |
| * returns a double value, which is the probability of this data point being True class |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.elastic_net_binomial_prob ( |
| coefficients DOUBLE PRECISION[], |
| intercept DOUBLE PRECISION, |
| ind_var DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS |
| 'MODULE_PATHNAME', '__elastic_net_binomial_prob' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| /* Compute the log-likelihood for one data point */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__elastic_net_binomial_loglikelihood ( |
| coefficients DOUBLE PRECISION[], |
| intercept DOUBLE PRECISION, |
| dep_var BOOLEAN, |
| ind_var DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS |
| 'MODULE_PATHNAME', '__elastic_net_binomial_loglikelihood' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| -- Compute the solution for just one step ------------------------------ |
| ------------------------------------------------------------------------ |
| |
| DROP TYPE IF EXISTS MADLIB_SCHEMA.__elastic_net_result CASCADE; |
| CREATE TYPE MADLIB_SCHEMA.__elastic_net_result AS ( |
| intercept DOUBLE PRECISION, |
| coefficients DOUBLE PRECISION[], |
| lambda_value DOUBLE PRECISION |
| ); |
| |
| ------------------------------------------------------------------------ |
| |
| /* IGD */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_igd_transition ( |
| state DOUBLE PRECISION[], |
| ind_var DOUBLE PRECISION[], |
| dep_var DOUBLE PRECISION, |
| pre_state DOUBLE PRECISION[], |
| lambda DOUBLE PRECISION, |
| alpha DOUBLE PRECISION, |
| dimension INTEGER, |
| stepsize DOUBLE PRECISION, |
| total_rows INTEGER, |
| xmean DOUBLE PRECISION[], |
| ymean DOUBLE PRECISION, |
| step_decay DOUBLE PRECISION |
| ) RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME', 'gaussian_igd_transition' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_igd_merge ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'gaussian_igd_merge' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_igd_final ( |
| state DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'gaussian_igd_final' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| /* |
| * Perform one iteration step of IGD for linear models |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__gaussian_igd_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ DOUBLE PRECISION, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ); |
| CREATE AGGREGATE MADLIB_SCHEMA.__gaussian_igd_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ DOUBLE PRECISION, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ) ( |
| SType = DOUBLE PRECISION[], |
| SFunc = MADLIB_SCHEMA.__gaussian_igd_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc = MADLIB_SCHEMA.__gaussian_igd_merge,') |
| FinalFunc = MADLIB_SCHEMA.__gaussian_igd_final, |
| InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}' |
| ); |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__gaussian_igd_step_single_seg ( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ DOUBLE PRECISION, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ); |
| CREATE AGGREGATE MADLIB_SCHEMA.__gaussian_igd_step_single_seg ( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ DOUBLE PRECISION, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ) ( |
| SType = DOUBLE PRECISION[], |
| SFunc = MADLIB_SCHEMA.__gaussian_igd_transition, |
| FinalFunc = MADLIB_SCHEMA.__gaussian_igd_final, |
| InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}' |
| ); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_igd_state_diff ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS |
| 'MODULE_PATHNAME', '__gaussian_igd_state_diff' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_igd_result ( |
| in_state DOUBLE PRECISION[], |
| feature_sq DOUBLE PRECISION[], |
| threshold DOUBLE PRECISION, |
| tolerance DOUBLE PRECISION |
| ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS |
| 'MODULE_PATHNAME', '__gaussian_igd_result' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| |
| /* FISTA */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_fista_transition ( |
| state DOUBLE PRECISION[], |
| ind_var DOUBLE PRECISION[], |
| dep_var DOUBLE PRECISION, |
| pre_state DOUBLE PRECISION[], |
| lambda DOUBLE PRECISION, |
| alpha DOUBLE PRECISION, |
| dimension INTEGER, |
| total_rows INTEGER, |
| max_stepsize DOUBLE PRECISION, |
| eta DOUBLE PRECISION, |
| use_active_set INTEGER, |
| is_active INTEGER, |
| random_stepsize INTEGER |
| ) RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME', 'gaussian_fista_transition' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_fista_merge ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'gaussian_fista_merge' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_fista_final ( |
| state DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'gaussian_fista_final' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| /* |
| Perform one iteration step of FISTA for linear models |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__gaussian_fista_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ DOUBLE PRECISION, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* total_rows */ INTEGER, |
| /* max_stepsize */ DOUBLE PRECISION, |
| /* eta */ DOUBLE PRECISION, |
| /* use_active_set */ INTEGER, |
| /* is_active */ INTEGER, |
| /* random_stepsize */ INTEGER |
| ); |
| CREATE AGGREGATE MADLIB_SCHEMA.__gaussian_fista_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ DOUBLE PRECISION, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* total_rows */ INTEGER, |
| /* max_stepsize */ DOUBLE PRECISION, |
| /* eta */ DOUBLE PRECISION, |
| /* use_active_set */ INTEGER, |
| /* is_active */ INTEGER, |
| /* random_stepsize */ INTEGER |
| ) ( |
| SType = DOUBLE PRECISION[], |
| SFunc = MADLIB_SCHEMA.__gaussian_fista_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc = MADLIB_SCHEMA.__gaussian_fista_merge,') |
| FinalFunc = MADLIB_SCHEMA.__gaussian_fista_final, |
| InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}' |
| ); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_fista_state_diff ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS |
| 'MODULE_PATHNAME', '__gaussian_fista_state_diff' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__gaussian_fista_result ( |
| in_state DOUBLE PRECISION[] |
| ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS |
| 'MODULE_PATHNAME', '__gaussian_fista_result' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| ------------------------------------------------------------------------ |
| ------------------------------------------------------------------------ |
| |
| /* Binomial IGD */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_igd_transition ( |
| state DOUBLE PRECISION[], |
| ind_var DOUBLE PRECISION[], |
| dep_var BOOLEAN, |
| pre_state DOUBLE PRECISION[], |
| lambda DOUBLE PRECISION, |
| alpha DOUBLE PRECISION, |
| dimension INTEGER, |
| stepsize DOUBLE PRECISION, |
| total_rows INTEGER, |
| xmean DOUBLE PRECISION[], |
| ymean DOUBLE PRECISION, |
| step_decay DOUBLE PRECISION |
| ) RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME', 'binomial_igd_transition' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_igd_merge ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'binomial_igd_merge' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_igd_final ( |
| state DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'binomial_igd_final' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| /* |
| * Perform one iteration step of IGD for linear models |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__binomial_igd_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ BOOLEAN, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ); |
| CREATE AGGREGATE MADLIB_SCHEMA.__binomial_igd_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ BOOLEAN, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ) ( |
| SType = DOUBLE PRECISION[], |
| SFunc = MADLIB_SCHEMA.__binomial_igd_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc = MADLIB_SCHEMA.__binomial_igd_merge,') |
| FinalFunc = MADLIB_SCHEMA.__binomial_igd_final, |
| InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}' |
| ); |
| |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__binomial_igd_step_single_seg ( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ BOOLEAN, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ); |
| CREATE AGGREGATE MADLIB_SCHEMA.__binomial_igd_step_single_seg ( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ BOOLEAN, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* stepsize */ DOUBLE PRECISION, |
| /* total_rows */ INTEGER, |
| /* xmeans */ DOUBLE PRECISION[], |
| /* ymean */ DOUBLE PRECISION, |
| /* step_decay */ DOUBLE PRECISION |
| ) ( |
| SType = DOUBLE PRECISION[], |
| SFunc = MADLIB_SCHEMA.__binomial_igd_transition, |
| FinalFunc = MADLIB_SCHEMA.__binomial_igd_final, |
| InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}' |
| ); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_igd_state_diff ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS |
| 'MODULE_PATHNAME', '__binomial_igd_state_diff' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_igd_result ( |
| in_state DOUBLE PRECISION[], |
| feature_sq DOUBLE PRECISION[], |
| threshold DOUBLE PRECISION, |
| tolerance DOUBLE PRECISION |
| ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS |
| 'MODULE_PATHNAME', '__binomial_igd_result' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| ------------------------------------------------------------------------ |
| |
| /* Binomial FISTA */ |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_fista_transition ( |
| state DOUBLE PRECISION[], |
| ind_var DOUBLE PRECISION[], |
| dep_var BOOLEAN, |
| pre_state DOUBLE PRECISION[], |
| lambda DOUBLE PRECISION, |
| alpha DOUBLE PRECISION, |
| dimension INTEGER, |
| total_rows INTEGER, |
| max_stepsize DOUBLE PRECISION, |
| eta DOUBLE PRECISION, |
| use_active_set INTEGER, |
| is_active INTEGER, |
| random_stepsize INTEGER |
| ) RETURNS DOUBLE PRECISION[] |
| AS 'MODULE_PATHNAME', 'binomial_fista_transition' |
| LANGUAGE C IMMUTABLE |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_fista_merge ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'binomial_fista_merge' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_fista_final ( |
| state DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION[] AS |
| 'MODULE_PATHNAME', 'binomial_fista_final' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| /* |
| Perform one iteration step of FISTA for linear models |
| */ |
| DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__binomial_fista_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ BOOLEAN, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* total_rows */ INTEGER, |
| /* max_stepsize */ DOUBLE PRECISION, |
| /* eta */ DOUBLE PRECISION, |
| /* use_active_set */ INTEGER, |
| /* is_active */ INTEGER, |
| /* random_stepsize */ INTEGER |
| ); |
| CREATE AGGREGATE MADLIB_SCHEMA.__binomial_fista_step( |
| /* ind_var */ DOUBLE PRECISION[], |
| /* dep_var */ BOOLEAN, |
| /* pre_state */ DOUBLE PRECISION[], |
| /* lambda */ DOUBLE PRECISION, |
| /* alpha */ DOUBLE PRECISION, |
| /* dimension */ INTEGER, |
| /* total_rows */ INTEGER, |
| /* max_stepsize */ DOUBLE PRECISION, |
| /* eta */ DOUBLE PRECISION, |
| /* use_active_set */ INTEGER, |
| /* is_active */ INTEGER, |
| /* random_stepsize */ INTEGER |
| ) ( |
| SType = DOUBLE PRECISION[], |
| SFunc = MADLIB_SCHEMA.__binomial_fista_transition, |
| m4_ifdef(`__POSTGRESQL__', `', `prefunc = MADLIB_SCHEMA.__binomial_fista_merge,') |
| FinalFunc = MADLIB_SCHEMA.__binomial_fista_final, |
| InitCond = '{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}' |
| ); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_fista_state_diff ( |
| state1 DOUBLE PRECISION[], |
| state2 DOUBLE PRECISION[] |
| ) RETURNS DOUBLE PRECISION AS |
| 'MODULE_PATHNAME', '__binomial_fista_state_diff' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| -- |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__binomial_fista_result ( |
| in_state DOUBLE PRECISION[] |
| ) RETURNS MADLIB_SCHEMA.__elastic_net_result AS |
| 'MODULE_PATHNAME', '__binomial_fista_result' |
| LANGUAGE C IMMUTABLE STRICT |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `'); |
| |
| |