| /* ----------------------------------------------------------------------- *//** |
| * |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| * |
| * @file pred_metrics.sql_in |
| * |
| * @brief A collection of summary statistics to gauge model |
| * accuracy based on predicted values vs. ground-truth values. |
| * @date April 2016 |
| * |
| * |
| *//* ----------------------------------------------------------------------- */ |
| |
| m4_include(`SQLCommon.m4') |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| @addtogroup grp_pred |
| |
| <div class="toc"><b>Contents</b> |
| <ul> |
| <li><a href="#list">List of Prediction Metric Functions</a></li> |
| <li><a href="#specs">Function Specific Details</a></li> |
| <li><a href="#examples">Examples</a></li> |
| <li><a href="#literature">Literature</a></li> |
| <li><a href="#related">Related Topics</a></li> |
| </ul> |
| </div> |
| |
| @brief Provides various prediction accuracy metrics. |
| |
| This module provides a set of metrics to evaluate the quality of predictions of a model. |
| A typical function will take a set of "prediction" and |
| "observation" values and use them to calculate the desired metric, unless noted otherwise. |
| Grouping is supported for all functions (except confusion matrix). |
| |
| @anchor list |
| @par Prediction Metrics Functions |
| <table class="output"> |
| <tr><th>mean_abs_error(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> Mean absolute error </td></tr> |
| <tr><th>mean_abs_perc_error(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> Mean absolute percentage error </td></tr> |
| <tr><th>mean_perc_error(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> Mean percentage error </td></tr> |
| <tr><th>mean_squared_error(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> Mean squared error</td></tr> |
| <tr><th>r2_score(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> R-squared </td></tr> |
| <tr><th>adjusted_r2_score(table_in, table_out, prediction_col, observed_col, |
| num_predictors, training_size, grouping_cols)</th><td> Adjusted R-squared |
| </td></tr> |
| <tr><th>binary_classifier(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> Collection of prediction metrics related to binary classification</td></tr> |
| <tr><th>area_under_roc(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> Area under the ROC curve (in binary classification) |
| </td></tr> |
| <tr><th>confusion_matrix(table_in, table_out, prediction_col, observed_col, |
| grouping_cols)</th><td> Confusion matrix for a multi-class classifier |
| </td></tr> |
| </table> |
| |
| \b Arguments |
| <DL class="arglist"> |
| <DT>table_in</DT> |
| <DD>TEXT. Name of the input table.</DD> |
| <DT>table_out</DT> |
| <DD>TEXT. Name of the output table. For consistency, a table is created for all metric outputs even when grouping is not used, which may mean there is only a single value in the output table in some cases. </DD> |
| <DT>prediction_col</DT> |
| <DD>TEXT. Name of the column of predicted values from input table.</DD> |
| <DT>observed_col</DT> |
| <DD>TEXT. Name of the column of observed values from input table.</DD> |
| <DT>num_predictors (for adjusted R-squared score only)</DT> |
| <DD>INTEGER. The number of parameters in the predicting model, not counting the |
| constant term.</DD> |
| <DT>training_size (for adjusted R-squared score only)</DT> |
| <DD>INTEGER. The number of rows used for training, excluding any NULL rows.</DD> |
| <DT>grouping_cols (optional)</DT> |
| <DD>TEXT, default: NULL. Name of the column of grouping values from input |
| table.</DD> |
| </DL> |
| |
| @anchor specs |
| @par Function Specific Details |
| |
| <b>R-squared Score</b> |
| |
| This function returns the coefficient of determination (R2) between the |
| predicted and observed values. An R2 of 1 indicates that the regression line |
| perfectly fits the data, while an R2 of 0 indicates that the line does not fit |
| the data at all. Negative values of R2 may occur when fitting non-linear |
| functions to data. Please refer to reference <a href="#r2">[1]</a> for |
| more details. |
| |
| <b>Adjusted R-squared Score</b> |
| |
| This function returns the adjusted R2 score in addition to the R-squared score |
| described above. Adjusted R2 score is used to |
| counter the problem of the R2 automatically increasing when extra explanatory |
| variables are added to the model. It takes two additional parameters describing |
| the degrees of freedom of the model (num_predictors) and the size of the |
| training set over which it was developed (training_size): |
| |
| - num_predictors: Indicates the number of parameters the model has other than |
| the constant term. For example, if it is set to '3' the model may take the |
| following form as an example: 7 + 5x + 39y + 0.91z. |
| - training_size: Indicates the number of rows in the training set (excluding |
| any NULL rows). |
| |
| Neither of these arguments can be deduced from the predicted values and the test |
| data alone which is why they are explicit inputs. Please refer to |
| reference <a href="#r2">[1]</a> for more details. |
| |
| @anchor bc |
| <b>Binary Classification</b> |
| |
| This function returns an output table with a number of metrics commonly used in |
| binary classification. |
| |
| The definitions of the various metrics are as follows: |
| |
| - \f$\textit{tp}\f$ is the count of correctly-classified positives. |
| - \f$\textit{tn}\f$ is the count of correctly-classified negatives. |
| - \f$\textit{fp}\f$ is the count of misclassified negatives. |
| - \f$\textit{fn}\f$ is the count of misclassified positives. |
| - \f$\textit{tpr}=\textit{tp}/(\textit{tp}+\textit{fn})\f$. |
| - \f$\textit{tnr}=\textit{tn}/(\textit{fp}+\textit{tn})\f$. |
| - \f$\textit{ppv}=\textit{tp}/(\textit{tp}+\textit{fp})\f$. |
| - \f$\textit{npv}=\textit{tn}/(\textit{tn}+\textit{fn})\f$. |
| - \f$\textit{fpr}=\textit{fp}/(\textit{fp}+\textit{tn})\f$. |
| - \f$\textit{fdr}=1-\textit{ppv}\f$. |
| - \f$\textit{fnr}=\textit{fn}/(\textit{fn}+\textit{tp})\f$. |
| - \f$\textit{acc}=(\textit{tp}+\textit{tn})/(\textit{tp}+\textit{tn}+\textit{fp} |
| +\textit{fn})\f$. |
| - \f$\textit{f1}=2*\textit{tp}/(2*\textit{tp}+\textit{fp}+\textit{fn})\f$. |
| |
| |
| <b>Area Under ROC Curve</b> |
| |
| This function returns the area under the Receiver Operating Characteristic curve |
| for binary classification (the AUC). The ROC curve is the curve relating the |
| classifier's TPR and FPR metrics. (See <a href="#bc">Binary Classification</a> |
| above for a definition of these metrics). Please refer to reference <a |
| href="#aoc">[2]</a> for more details. Note that the binary classification |
| function can be used to obtain the data (TPR and FPR values) required for |
| drawing the ROC curve. |
| |
| @note For 'binary_classifier' and 'area_under_roc' functions: |
| - The 'observed_col' column is assumed to be a numeric column with |
| two values: 0 and 1, or a Boolean column. For the purposes of the metric |
| calculation, 0 is considered to be negative and 1 to be positive. |
| - The 'pred_col' column is expected to contain numeric values corresponding to |
| likelihood/probability. A larger value corresponds to greater certainty that the |
| observed value will be '1', and a lower value corresponds to a greater certainty that |
| it will be '0'. |
| |
| <b>Confusion Matrix</b> |
| |
| This function returns the confusion matrix of a multi-class classification. Each |
| column of the matrix represents the instances in a predicted class while each |
| row represents the instances in an actual class. This allows more detailed |
| analysis than mere proportion of correct guesses (accuracy). Please refer to the |
| reference <a href="#cm">[3]</a> for more details. Please note that grouping |
| is not supported for the confusion matrix. |
| |
| @anchor examples |
| @examp |
| |
| -# Create the sample data: |
| <pre class="example"> |
| DROP TABLE IF EXISTS test_set; |
| CREATE TABLE test_set( |
| pred FLOAT8, |
| obs FLOAT8 |
| ); |
| INSERT INTO test_set VALUES |
| (37.5,53.1), (12.3,34.2), (74.2,65.4), (91.1,82.1); |
| </pre> |
| |
| -# Run the Mean Absolute Error function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.mean_abs_error( 'test_set', 'table_out', 'pred', 'obs'); |
| SELECT * FROM table_out; |
| </pre> |
| Result |
| <pre class="result"> |
| mean_abs_error |
| ---------------- |
| 13.825 |
| </pre> |
| |
| -# Run the Mean Absolute Percentage Error function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.mean_abs_perc_error( 'test_set', 'table_out', 'pred', 'obs'); |
| SELECT * FROM table_out; |
| </pre> |
| Result |
| <pre class="result"> |
| mean_abs_perc_error |
| --------------------- |
| 0.294578793636013 |
| </pre> |
| |
| -# Run the Mean Percentage Error function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.mean_perc_error( 'test_set', 'table_out', 'pred', 'obs'); |
| SELECT * FROM table_out; |
| </pre> |
| Result |
| <pre class="result"> |
| mean_perc_error |
| ------------------- |
| -0.17248930032771 |
| </pre> |
| |
| -# Run the Mean Squared Error function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.mean_squared_error( 'test_set', 'table_out', 'pred', 'obs'); |
| SELECT * FROM table_out; |
| </pre> |
| Result |
| <pre class="result"> |
| mean_squared_error |
| -------------------- |
| 220.3525 |
| </pre> |
| |
| -# Run the R2 Score function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.r2_score( 'test_set', 'table_out', 'pred', 'obs'); |
| SELECT * FROM table_out; |
| </pre> |
| Result |
| <pre class="result"> |
| r2_score |
| ------------------------ |
| 0.27992908844337695865 |
| </pre> |
| |
| -# Run the Adjusted R2 Score function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.adjusted_r2_score( 'test_set', 'table_out', 'pred', 'obs', 3, 100); |
| SELECT * FROM table_out; |
| </pre> |
| Result |
| <pre class="result"> |
| r2_score | adjusted_r2_score |
| --------------------+------------------ |
| 0.279929088443375 | 0.257426872457231 |
| </pre> |
| |
| -# Create the sample data for binary classifier metrics: |
| <pre class="example"> |
| DROP TABLE IF EXISTS test_set; |
| CREATE TABLE test_set AS |
| SELECT ((a*8)::integer)/8.0 pred, |
| ((a*0.5+random()*0.5)>0.5) obs |
| FROM (select random() as a from generate_series(1,100)) x; |
| </pre> |
| |
| -# Run the Binary Classifier metrics function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.binary_classifier( 'test_set', 'table_out', 'pred', 'obs'); |
| </pre> |
| |
| -# View the True Positive Rate and the False Positive Rate: |
| <pre class="example"> |
| SELECT threshold, tpr, fpr FROM table_out ORDER BY threshold; |
| </pre> |
| Result (your results for this and other functions below will look different due to the presence of the random function in sample data generator): |
| <pre class="result"> |
| threshold | tpr | fpr |
| ------------------------+------------------------+------------------------ |
| 0.00000000000000000000 | 1.00000000000000000000 | 1.00000000000000000000 |
| 0.12500000000000000000 | 1.00000000000000000000 | 0.94915254237288135593 |
| 0.25000000000000000000 | 0.92682926829268292683 | 0.64406779661016949153 |
| 0.37500000000000000000 | 0.80487804878048780488 | 0.47457627118644067797 |
| 0.50000000000000000000 | 0.70731707317073170732 | 0.35593220338983050847 |
| 0.62500000000000000000 | 0.63414634146341463415 | 0.25423728813559322034 |
| 0.75000000000000000000 | 0.48780487804878048780 | 0.06779661016949152542 |
| 0.87500000000000000000 | 0.29268292682926829268 | 0.03389830508474576271 |
| 1.00000000000000000000 | 0.12195121951219512195 | 0.00000000000000000000 |
| </pre> |
| |
| -# View all metrics at a given threshold value: |
| <pre class="example"> |
| -- Set extended display on for easier reading of output |
| \\x on |
| SELECT * FROM table_out WHERE threshold=0.5; |
| </pre> |
| Result |
| <pre class="result"> |
| -[ RECORD 1 ]--------------------- |
| threshold | 0.50000000000000000000 |
| tp | 29 |
| fp | 21 |
| fn | 12 |
| tn | 38 |
| tpr | 0.70731707317073170732 |
| tnr | 0.64406779661016949153 |
| ppv | 0.58000000000000000000 |
| npv | 0.76000000000000000000 |
| fpr | 0.35593220338983050847 |
| fdr | 0.42000000000000000000 |
| fnr | 0.29268292682926829268 |
| acc | 0.67000000000000000000 |
| f1 | 0.63736263736263736264 |
| </pre> |
| |
| -# Run the Area Under ROC curve function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.area_under_roc( 'test_set', 'table_out', 'pred', 'obs'); |
| SELECT * FROM table_out; |
| </pre> |
| Result |
| <pre class="result"> |
| area_under_roc |
| --------------------------------------------- |
| 0.77428689541132699462698842496899545266640 |
| </pre> |
| |
| -# Create the sample data for confusion matrix. |
| <pre class="example"> |
| DROP TABLE IF EXISTS test_set; |
| CREATE TABLE test_set AS |
| SELECT (x+y)%5+1 AS pred, |
| (x*y)%5 AS obs |
| FROM generate_series(1,5) x, |
| generate_series(1,5) y; |
| </pre> |
| -# Run the confusion matrix function: |
| <pre class="example"> |
| DROP TABLE IF EXISTS table_out; |
| SELECT madlib.confusion_matrix( 'test_set', 'table_out', 'pred', 'obs'); |
| SELECT * FROM table_out ORDER BY class; |
| </pre> |
| Result |
| <pre class="result"> |
| class | confusion_arr |
| -------+--------------- |
| 0 | {0,1,2,2,2,2} |
| 1 | {0,2,0,1,1,0} |
| 2 | {0,0,0,2,2,0} |
| 3 | {0,0,2,0,0,2} |
| 4 | {0,2,1,0,0,1} |
| 5 | {0,0,0,0,0,0} |
| </pre> |
| |
| @anchor literature |
| @par Literature |
| |
| @anchor r2 |
| |
| [1] https://en.wikipedia.org/wiki/Coefficient_of_determination |
| |
| @anchor aoc |
| |
| [2] https://en.wikipedia.org/wiki/Receiver_operating_characteristic |
| |
| @anchor cm |
| |
| [3] https://en.wikipedia.org/wiki/Confusion_matrix |
| |
| @anchor related |
| @par Related Topics |
| |
| File pred_metrics.sql_in for list of functions and usage. |
| |
| **/ |
| |
| /** |
| * @brief Mean Absolute Error |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.mean_abs_error( |
| table_in, table_out, prediction_col, observed_col, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.mean_abs_error($1,$2,$3,$4,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_error(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.metric_agg_help_msg(schema_madlib, message, |
| 'mean_abs_error') |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_error() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.mean_abs_error(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief Mean Absolute Percentage Error |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_perc_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.mean_abs_perc_error( |
| table_in, table_out, prediction_col, observed_col, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_perc_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.mean_abs_perc_error($1,$2,$3,$4,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_perc_error(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.metric_agg_help_msg(schema_madlib, message, |
| 'mean_abs_perc_error') |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_abs_perc_error() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.mean_abs_perc_error(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief Mean Percentage Error |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_perc_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.mean_perc_error( |
| table_in, table_out, prediction_col, observed_col, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_perc_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.mean_perc_error($1,$2,$3,$4,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_perc_error(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.metric_agg_help_msg(schema_madlib, message, |
| 'mean_perc_error') |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_perc_error() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.mean_perc_error(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief Mean Squared Error |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_squared_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.mean_squared_error( |
| table_in, table_out, prediction_col, observed_col, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_squared_error( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.mean_squared_error($1,$2,$3,$4,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_squared_error(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.metric_agg_help_msg(schema_madlib, message, |
| 'mean_squared_error') |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.mean_squared_error() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.mean_squared_error(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief R2 Score |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.r2_score( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.r2_score( |
| table_in, table_out, prediction_col, observed_col, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.r2_score( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.r2_score($1,$2,$3,$4,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.r2_score(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.r2_score_help(schema_madlib, message) |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.r2_score() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.r2_score(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief Adjusted R2 Score |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param num_predictors The number of parameters in the predicting model, not |
| * counting the constant term. |
| * @param training_size The number of rows used for training, excluding any NULL |
| * rows. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.adjusted_r2_score( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| num_predictors INTEGER, |
| training_size INTEGER, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.adjusted_r2_score( |
| table_in, table_out, prediction_col, observed_col, |
| num_predictors, training_size, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.adjusted_r2_score( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| num_predictors INTEGER, |
| training_size INTEGER |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.adjusted_r2_score($1,$2,$3,$4,$5,$6,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.adjusted_r2_score(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.adjusted_r2_score_help(schema_madlib, message) |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.adjusted_r2_score() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.adjusted_r2_score(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief Binary Classifier |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.binary_classifier( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.binary_classifier( |
| table_in, table_out, prediction_col, observed_col, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.binary_classifier( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.binary_classifier($1,$2,$3,$4,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.binary_classifier(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.binary_classifier_help(schema_madlib, message) |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.binary_classifier() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.binary_classifier(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief Area under ROC Curve |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.area_under_roc( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT, |
| grouping_cols TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.area_under_roc( |
| table_in, table_out, prediction_col, observed_col, grouping_cols) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.area_under_roc( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| SELECT MADLIB_SCHEMA.area_under_roc($1,$2,$3,$4,NULL) |
| $$ LANGUAGE SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.area_under_roc(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.area_under_roc_help(schema_madlib, message) |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.area_under_roc() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.area_under_roc(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| /* ----------------------------------------------------------------------- */ |
| |
| /** |
| * @brief Confusion Matrix |
| * |
| * @param table_in Name of the input table. |
| * @param table_out Name of the output table. |
| * @param prediction_col Name of the column of predicted values from input table. |
| * @param observed_col Name of the column of observed values from input table. |
| * @param grouping_cols Name of the column of grouping values from input table. |
| * |
| */ |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.confusion_matrix( |
| table_in TEXT, |
| table_out TEXT, |
| prediction_col TEXT, |
| observed_col TEXT |
| ) RETURNS VOID |
| AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.confusion_matrix( |
| table_in, table_out, prediction_col, observed_col) |
| $$ LANGUAGE plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.confusion_matrix(message TEXT) |
| RETURNS TEXT AS $$ |
| PythonFunctionBodyOnly(`stats', `pred_metrics') |
| return pred_metrics.confusion_matrix_help(schema_madlib, message) |
| $$ language plpythonu |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.confusion_matrix() |
| RETURNS TEXT AS $$ |
| SELECT MADLIB_SCHEMA.confusion_matrix(''::TEXT); |
| $$ language SQL |
| m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `'); |
| |
| /* ----------------------------------------------------------------------- */ |