| m4_include(`SQLCommon.m4') |
| |
| /* ----------------------------------------------------------------------------- |
| * Test Linear Regression. |
| * -------------------------------------------------------------------------- */ |
| |
| /* |
| * The following example is taken from: |
| * http://www.weibull.com/DOEWeb/estimating_regression_models_using_least_squares.htm#Example%205.1 |
| * http://www.weibull.com/DOEWeb/hypothesis_tests_in_multiple_linear_regression.htm#Example%205.3 |
| */ |
| DROP TABLE IF EXISTS weibull; |
| CREATE TABLE weibull ( |
| id INTEGER NOT NULL, |
| x1 DOUBLE PRECISION, |
| x2 DOUBLE PRECISION, |
| y DOUBLE PRECISION |
| ) m4_ifdef(`__POSTGRESQL__', `', `DISTRIBUTED BY (id)'); |
| |
| /* |
| * We materialize the table here because on MPP systems we want to distribute |
| * the data to the segments (and not do the calculations in memory). |
| */ |
| INSERT INTO weibull(id, x1, x2, y) VALUES |
| ( 1, 41.9, 29.1, 251.3), |
| ( 2, 43.4, 29.3, 251.3), |
| ( 3, 43.9, 29.5, 248.3), |
| ( 4, 44.5, 29.7, 267.5), |
| ( 5, 47.3, 29.9, 273.0), |
| ( 6, 47.5, 30.3, 276.5), |
| ( 7, 47.9, 30.5, 270.3), |
| ( 8, 50.2, 30.7, 274.9), |
| ( 9, 52.8, 30.8, 285.0), |
| (10, 53.2, 30.9, 290.0), |
| (11, 56.7, 31.5, 297.0), |
| (12, 57.0, 31.7, 302.5), |
| (13, 63.5, 31.9, 304.5), |
| (14, 65.3, 32.0, 309.3), |
| (15, 71.1, 32.1, 321.7), |
| (16, 77.0, 32.5, 330.7), |
| (17, 77.8, 32.9, 349.0); |
| |
| |
| --------------------- Robust linear regression install checks ----------- |
| |
| SELECT * FROM ( |
| SELECT (robust_linregr(y, ARRAY[1, x1, x2], coef)).* |
| FROM |
| ( SELECT (linregr(y, ARRAY[1, x1, x2])).coef |
| FROM weibull |
| ) as mle_coef, |
| weibull as src |
| ) q; |
| DROP TABLE IF EXISTS result_weibull_3; |
| DROP TABLE IF EXISTS result_weibull_3_summary; |
| select robust_variance_linregr('weibull', 'result_weibull_3', 'y', 'ARRAY[1, x1, x2]'); |
| -- End of robust linear regression |
| |
| |
| /* Robust variance test |
| * The following example is uses the coefficients computed in the above |
| * example, and checks that the robust variance is correct. |
| * The correct values were checked with R. For completeness, the following |
| * R code segment can be used to reproduce the correct values. This code segment |
| * expects the data table 'patients' to be in a file called "patients.txt". |
| * the data file should look exactly like: |
| second_attack, treatment, trait_anxiety |
| 1, 1, 70 |
| 1, 1, 80 |
| 1, 1, 50 |
| 1, 0, 60 |
| 1, 0, 40 |
| 1, 0, 65 |
| 1, 0, 75 |
| 1, 0, 80 |
| 1, 0, 70 |
| 1, 0, 60 |
| 0, 1, 65 |
| 0, 1, 50 |
| 0, 1, 45 |
| 0, 1, 35 |
| 0, 1, 40 |
| 0, 1, 50 |
| 0, 0, 55 |
| 0, 0, 45 |
| 0, 0, 50 |
| 0, 0, 60 |
| * BEGIN CODE: |
| data <- read.csv("patients.txt") |
| regress <- glm( second_attack ~ treatment + trait_anxiety, family=binomial("logit"), data = data) |
| library(sandwich) |
| vcovHC(regress, 'HC0') |
| * END CODE |
| */ |
| |
| |
| DROP TABLE IF EXISTS patients; |
| |
| CREATE TABLE patients ( |
| id INTEGER NOT NULL, |
| second_attack INTEGER, |
| treatment INTEGER, |
| trait_anxiety INTEGER |
| ) m4_ifdef(`__POSTGRESQL__', `', `DISTRIBUTED BY (id)'); |
| INSERT INTO patients(ID, second_attack, treatment, trait_anxiety) VALUES |
| ( 1, 1, 1, 70), |
| ( 2, 1, 1, 80), |
| ( 3, 1, 1, 50), |
| ( 4, 1, 0, 60), |
| ( 5, 1, 0, 40), |
| ( 6, 1, 0, 65), |
| ( 7, 1, 0, 75), |
| ( 8, 1, 0, 80), |
| ( 9, 1, 0, 70), |
| (10, 1, 0, 60), |
| (11, 0, 1, 65), |
| (12, 0, 1, 50), |
| (13, 0, 1, 45), |
| (14, 0, 1, 35), |
| (15, 0, 1, 40), |
| (16, 0, 1, 50), |
| (17, 0, 0, 55), |
| (18, 0, 0, 45), |
| (19, 0, 0, 50), |
| (20, 0, 0, 60); |
| |
| |
| drop table if exists temp_result; |
| drop table if exists temp_result_summary; |
| select robust_variance_logregr('patients', 'temp_result', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]'); |
| |
| SELECT assert( |
| relative_error(coef, ARRAY[-6.36346, -1.024, 0.11904]) < 1e-3 AND |
| relative_error(std_err, ARRAY[3.4587,1.171,0.05343]) < 1e-3 AND |
| relative_error(z_stats, ARRAY[-1.8398,-0.87409,2.2279]) < 1e-3 AND |
| relative_error(p_values, ARRAY[0.06579,0.38206,0.025884]) < 1e-3, |
| 'Robust Variance: Wrong Results' |
| )FROM temp_result; |
| |
| drop table if exists temp_result; |
| drop table if exists temp_result_summary; |
| select robust_variance_logregr('patients', 'temp_result', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]', NULL ); |
| |
| drop table if exists temp_result; |
| drop table if exists temp_result_summary; |
| select robust_variance_logregr('patients', 'temp_result', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]', NULL, 20 ); |
| |
| drop table if exists temp_result; |
| drop table if exists temp_result_summary; |
| select robust_variance_logregr('patients', 'temp_result', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]', NULL, 20 , 'irls'); |
| |
| drop table if exists temp_result; |
| drop table if exists temp_result_summary; |
| select robust_variance_logregr('patients', 'temp_result', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]', NULL, 20, 'irls', 0.0001 ); |
| |
| drop table if exists temp_result; |
| drop table if exists temp_result_summary; |
| select robust_variance_logregr('patients', 'temp_result', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]', NULL, 20, 'irls', 0.0001, FALSE ); |
| |
| ------------------------------------------------------------------------- |
| |
| DROP TABLE IF EXISTS test_hw_mlogr; |
| CREATE TABLE test_hw_mlogr( |
| feat1 INTEGER, |
| feat2 DOUBLE PRECISION, |
| cat INTEGER |
| ); |
| |
| INSERT INTO test_hw_mlogr(feat1, feat2, cat) VALUES |
| (1,0.35,1), |
| (2,0.33,0), |
| (3,0.39,1), |
| (1,0.37,1), |
| (2,0.31,1), |
| (3,0.36,0), |
| (2,0.36,1), |
| (2,0.31,1), |
| (2,0.41,1), |
| (2,0.37,1), |
| (1,0.44,1), |
| (3,0.33,2), |
| (1,0.31,1), |
| (2,0.44,1), |
| (1,0.35,1), |
| (1,0.44,0), |
| (1,0.46,0), |
| (2,0.46,1), |
| (2,0.46,2), |
| (3,0.49,1), |
| (2,0.39,0), |
| (2,0.44,1), |
| (1,0.47,1), |
| (1,0.44,1), |
| (1,0.37,2), |
| (3,0.38,2), |
| (1,0.49,0), |
| (2,0.44,0), |
| (1,0.41,2), |
| (1,0.5,2), |
| (2,0.44,0), |
| (1,0.39,1), |
| (1,0.4,2), |
| (1,0.46,2), |
| (2,0.41,1), |
| (2,0.39,1), |
| (2,0.33,1), |
| (3,0.59,2), |
| (1,0.41,0), |
| (2,0.47,2), |
| (2,0.31,0), |
| (3,0.42,2), |
| (1,0.55,2), |
| (3,0.4,1), |
| (1,0.44,2), |
| (1,0.54,1), |
| (2,0.46,1), |
| (1,0.54,0), |
| (2,0.42,1), |
| (2,0.49,2), |
| (2,0.41,2), |
| (2,0.41,1), |
| (1,0.44,0), |
| (1,0.57,2), |
| (2,0.52,2), |
| (1,0.49,0), |
| (3,0.41,2), |
| (3,0.57,0), |
| (1,0.62,1), |
| (3,0.33,0), |
| (2,0.54,1), |
| (2,0.4,2), |
| (3,0.52,2), |
| (2,0.57,1), |
| (2,0.49,1), |
| (2,0.46,1), |
| (1,0.57,0), |
| (2,0.49,2), |
| (2,0.52,2), |
| (2,0.53,0), |
| (3,0.54,2), |
| (2,0.57,2), |
| (3,0.41,2), |
| (1,0.52,0), |
| (2,0.57,1), |
| (1,0.54,0), |
| (2,0.52,1), |
| (2,0.52,0), |
| (2,0.44,0), |
| (2,0.46,2), |
| (1,0.49,1), |
| (2,0.54,2), |
| (3,0.52,2), |
| (1,0.44,0), |
| (3,0.49,1), |
| (1,0.46,2), |
| (2,0.54,0), |
| (2,0.39,0), |
| (2,0.59,0), |
| (2,0.45,1), |
| (3,0.52,1), |
| (3,0.54,0), |
| (3,0.44,1), |
| (2,0.5,2), |
| (2,0.62,1), |
| (2,0.59,0), |
| (2,0.52,2), |
| (2,0.52,1), |
| (2,0.46,1), |
| (2,0.41,0), |
| (2,0.52,2), |
| (2,0.52,1), |
| (2,0.55,1), |
| (2,0.41,1), |
| (2,0.49,0), |
| (1,0.59,2), |
| (1,0.54,0), |
| (2,0.54,0), |
| (2,0.59,2), |
| (2,0.55,2), |
| (1,0.62,2), |
| (2,0.54,2), |
| (2,0.54,2), |
| (2,0.54,2), |
| (2,0.59,2), |
| (2,0.57,1), |
| (3,0.61,2), |
| (3,0.52,2), |
| (2,0.59,2), |
| (2,0.62,2), |
| (1,0.6,1), |
| (2,0.59,2), |
| (2,0.65,2), |
| (3,0.61,2), |
| (2,0.59,2), |
| (3,0.59,2), |
| (2,0.59,2), |
| (2,0.59,2), |
| (2,0.65,2), |
| (3,0.57,2), |
| (2,0.59,2), |
| (3,0.49,2), |
| (1,0.49,0), |
| (3,0.59,2), |
| (2,0.62,2), |
| (3,0.59,0), |
| (2,0.54,2), |
| (3,0.63,2), |
| (1,0.43,2), |
| (3,0.54,2), |
| (3,0.52,2), |
| (1,0.57,2), |
| (2,0.57,0), |
| (2,0.57,0), |
| (2,0.61,2), |
| (2,0.62,0), |
| (2,0.62,0), |
| (1,0.65,0), |
| (2,0.57,2), |
| (3,0.59,2), |
| (2,0.59,2), |
| (3,0.62,2), |
| (2,0.65,2), |
| (2,0.62,1), |
| (1,0.62,0), |
| (2,0.62,2), |
| (3,0.54,2), |
| (3,0.62,2), |
| (1,0.65,2), |
| (3,0.62,2), |
| (3,0.67,0), |
| (3,0.65,0), |
| (1,0.6,2), |
| (3,0.59,2), |
| (2,0.59,2), |
| (2,0.59,1), |
| (3,0.65,0), |
| (3,0.62,2), |
| (3,0.65,2), |
| (3,0.59,0), |
| (1,0.59,0), |
| (3,0.61,2), |
| (1,0.65,2), |
| (3,0.67,1), |
| (3,0.65,2), |
| (1,0.65,2), |
| (2,0.67,2), |
| (1,0.65,2), |
| (1,0.62,2), |
| (3,0.52,2), |
| (3,0.63,2), |
| (2,0.59,2), |
| (3,0.65,2), |
| (2,0.59,0), |
| (3,0.67,2), |
| (3,0.67,2), |
| (3,0.6,2), |
| (3,0.67,2), |
| (3,0.62,2), |
| (2,0.54,2), |
| (3,0.65,2), |
| (3,0.62,2), |
| (2,0.59,2), |
| (3,0.6,2), |
| (3,0.63,2), |
| (3,0.65,2), |
| (2,0.63,1), |
| (2,0.67,2), |
| (2,0.65,2), |
| (2,0.62,2); |
| |
| |
| -- Checks for the function usage |
| select robust_variance_mlogregr('help'); |
| select robust_variance_mlogregr(''); |
| select robust_variance_mlogregr('random_test'); |
| select robust_variance_mlogregr('usage'); |
| |
| |
| DROP TABLE IF EXISTS test_hw_mlogr_result; |
| DROP TABLE IF EXISTS test_hw_mlogr_result_summary; |
| SELECT robust_variance_mlogregr( |
| 'test_hw_mlogr', 'test_hw_mlogr_result', |
| 'cat', 'ARRAY[1, feat1, feat2]', 0, NULL, 20, 'irls', 0.0001, False); |
| SELECT * FROM test_hw_mlogr_result; |
| SELECT assert(relative_error( |
| std_err, ARRAY[1.20, 0.29, 2.36]) < 1e-1, |
| 'Robust Variance: Wrong Results') |
| from test_hw_mlogr_result |
| where category = 1; |
| SELECT assert(relative_error( |
| std_err, ARRAY[1.28, 0.28, 2.18]) < 1e-1, |
| 'Robust Variance: Wrong Results') |
| from test_hw_mlogr_result |
| where category = 2; |
| |
| DROP TABLE IF EXISTS test_hw_mlogr_result; |
| DROP TABLE IF EXISTS test_hw_mlogr_result_summary; |
| SELECT robust_variance_mlogregr( |
| 'test_hw_mlogr', 'test_hw_mlogr_result', |
| 'cat', 'ARRAY[1, feat1, feat2]', 0, NULL, |
| 'max_iter=20, optimizer=irls, tolerance=0.0001', |
| False); |
| |
| |
| DROP TABLE IF EXISTS test_hw_mlogr_result; |
| DROP TABLE IF EXISTS test_hw_mlogr_result_summary; |
| SELECT robust_variance_mlogregr( |
| 'test_hw_mlogr', 'test_hw_mlogr_result', |
| 'cat', 'ARRAY[1, feat1, feat2]', 0, NULL, |
| 'max_num_iterations=20, optimizer=irls, precision=0.0001', |
| False); |
| |
| -- NULL handling test |
| DROP TABLE IF EXISTS weibull_with_null; |
| DROP TABLE IF EXISTS test_hw_mlogr_result_summary; |
| CREATE TABLE weibull_with_null AS SELECT * FROM weibull; |
| INSERT INTO weibull_with_null(id, x1, x2, y) VALUES |
| (18, NULL, 29.1, 251.3), |
| (19, 43.4, NULL, 251.3), |
| (20, 43.9, 29.5, NULL); |
| |
| DROP TABLE IF EXISTS temp_result; |
| DROP TABLE IF EXISTS temp_result_summary; |
| SELECT robust_variance_linregr('weibull_with_null', 'temp_result', 'y', |
| 'ARRAY[1, x1, x2]'); |
| |
| DROP TABLE IF EXISTS patients_with_null; |
| CREATE TABLE patients_with_null AS SELECT * FROM patients; |
| INSERT INTO patients_with_null(ID, second_attack, treatment, trait_anxiety) VALUES |
| (21, NULL, 1, 70), |
| (22, 1, NULL, 80), |
| (23, 1, 1, NULL); |
| |
| DROP TABLE IF EXISTS temp_result; |
| DROP TABLE IF EXISTS temp_result_summary; |
| SELECT robust_variance_logregr('patients_with_null', 'temp_result', 'second_attack', 'ARRAY[1, treatment, trait_anxiety]'); |
| |
| |
| DROP TABLE IF EXISTS test_hw_mlogr_null_value; |
| CREATE TABLE test_hw_mlogr_null_value AS SELECT * FROM test_hw_mlogr; |
| INSERT INTO test_hw_mlogr(feat1, feat2, cat) VALUES |
| (NULL,0.67,2), |
| (2,NULL,2), |
| (2,0.62,NULL); |
| |
| DROP TABLE IF EXISTS temp_result; |
| DROP TABLE IF EXISTS temp_result_summary; |
| SELECT robust_variance_mlogregr( |
| 'test_hw_mlogr_null_value', 'temp_result', |
| 'cat', 'ARRAY[1, feat1, feat2]', 0, NULL, |
| 'max_iter=20, optimizer=irls, tolerance=0.0001', False); |
| |
| SELECT assert(relative_error( |
| std_err, ARRAY[1.20, 0.29, 2.36]) < 1e-1, |
| 'Robust Variance: Wrong Results') |
| from temp_result |
| where category = 1; |
| SELECT assert(relative_error( |
| std_err, ARRAY[1.28, 0.28, 2.18]) < 1e-1, |
| 'Robust Variance: Wrong Results') |
| from temp_result |
| where category = 2; |