blob: 8b294aacf8954a6d49fb158deefc47c7601c7035 [file] [log] [blame]
/* -----------------------------------------------------------------------------
* Cox proportional hazard survival regression
*
* Both Example taken from:
* http://www.sph.emory.edu/~cdckms/CoxPH/prophaz2.html
* -------------------------------------------------------------------------- */
m4_include(`SQLCommon.m4')
m4_changequote(<!,!>)
m4_ifdef(<!__HAS_ORDERED_AGGREGATES__!>,<!
DROP TABLE IF EXISTS leukemia;
CREATE TABLE leukemia (
id INTEGER NOT NULL,
grp DOUBLE PRECISION,
wbc DOUBLE PRECISION,
timedeath INTEGER,
status BOOLEAN
)m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!DISTRIBUTED BY (id)!>);
INSERT INTO leukemia(id, grp, wbc, timedeath,status) VALUES
(0,0,1.45,35,TRUE),
(1,0,1.47,34,TRUE),
(3,0,2.2,32,TRUE),
(4,0,1.78,25,TRUE),
(5,0,2.57,23,TRUE),
(6,0,2.32,22,TRUE),
(7,0,2.01,20,TRUE),
(8,0,2.05,19,TRUE),
(9,0,2.16,17,TRUE),
(10,0,3.6,16,TRUE),
(11,1,2.3,15,TRUE),
(12,0,2.88,13,TRUE),
(13,1,1.5,12,TRUE),
(14,0,2.6,11,TRUE),
(15,0,2.7,10,TRUE),
(16,0,2.8,9,TRUE),
(17,1,2.32,8,TRUE),
(18,0,4.43,7,TRUE),
(19,0,2.31,6,TRUE),
(20,1,3.49,5,TRUE),
(21,1,2.42,4,TRUE),
(22,1,4.01,3,TRUE),
(23,1,4.91,2,TRUE),
(24,1,5,1,TRUE),
(25,NULL,4.01,3,TRUE),
(26,1,NULL,2,TRUE),
(27,1,NULL,2,NULL),
(28,1,5,NULL,TRUE);
drop table if exists coxph_out;
drop table if exists coxph_out_summary;
select coxph_train('leukemia', 'coxph_out', 'timedeath', 'ARRAY[grp, wbc]', 'status', NULL, NULL);
SELECT assert(
relative_error(coef, ARRAY[2.5444, 1.6718]) < 1e-2 AND
relative_error(loglikelihood, -37.8532) < 1e-2 AND
relative_error(std_err, ARRAY[0.6773,0.3873]) < 1e-2 AND
relative_error(z_stats, ARRAY[3.7567,4.3165]) < 1e-2 AND
relative_error(p_values, ARRAY[0.000172,0.00001584]) < 1e-2,
'Cox-Proportional hazards (unique time of death): Wrong results'
) FROM coxph_out;
DROP TABLE IF EXISTS tmp_out;
SELECT cox_prop_hazards('leukemia', 'tmp_out', 'timedeath', 'ARRAY[grp, wbc]', 'status');
SELECT * from tmp_out;
------------------------------------------------------------------------------
-- Testing an all NULL input ---------------------------------------------------
DROP TABLE IF EXISTS cox_null_table;
CREATE TABLE cox_null_table (
id INTEGER NOT NULL,
indep DOUBLE PRECISION [],
dep DOUBLE PRECISION,
status BOOLEAN
);
INSERT INTO cox_null_table(id, indep, dep,status) VALUES
(1, ARRAY[NULL, NULL]::FLOAT8[], 4, TRUE),
(2, ARRAY[NULL, NULL]::FLOAT8[], 4, TRUE),
(3, ARRAY[1, 2]::FLOAT8[], NULL, TRUE),
(4, ARRAY[NULL, NULL]::FLOAT8[], NULL, NULL),
(5, ARRAY[NULL, NULL]::FLOAT8[], 4, NULL),
(6, ARRAY[NULL, NULL]::FLOAT8[], 4, TRUE),
(7, ARRAY[NULL, NULL]::FLOAT8[], 4, NULL);
DROP TABLE IF EXISTS tmp_out;
DROP TABLE IF EXISTS tmp_out_summary;
SELECT coxph_train('cox_null_table', 'tmp_out', 'dep', 'indep', 'status');
SELECT * from tmp_out;
SELECT * from tmp_out_summary;
----------------------------------------------------------------------
DROP TABLE IF EXISTS leukemia;
CREATE TABLE leukemia (
id INTEGER NOT NULL,
grp DOUBLE PRECISION,
wbc DOUBLE PRECISION,
timedeath INTEGER,
status BOOLEAN
)m4_ifdef(<!__POSTGRESQL__!>, <!!>, <!DISTRIBUTED BY (id)!>);
INSERT INTO leukemia(id, grp, wbc, timedeath, status) VALUES
(1,0,1.45,35,TRUE),
(2,0,1.47,34,TRUE),
(3,0,2.2,32,TRUE),
(4,0,2.53,32,TRUE),
(5,0,1.78,25,TRUE),
(6,0,2.57,23,TRUE),
(7,0,2.32,22,TRUE),
(8,0,2.01,20,TRUE),
(9,0,2.05,19,TRUE),
(10,0,2.16,17,TRUE),
(11,0,3.6,16,TRUE),
(12,0,2.88,13,TRUE),
(13,0,2.6,11,TRUE),
(14,0,2.7,10,TRUE),
(15,0,2.96,10,TRUE),
(16,0,2.8,9,TRUE),
(17,0,4.43,7,TRUE),
(18,0,3.2,6,TRUE),
(19,0,2.31,6,TRUE),
(20,0,4.06,6,TRUE),
(21,0,3.28,6,TRUE),
(22,1,1.97,23,TRUE),
(23,1,2.73,22,TRUE),
(24,1,2.95,17,TRUE),
(25,1,2.3,15,TRUE),
(26,1,1.5,12,TRUE),
(27,1,3.06,12,TRUE),
(28,1,3.49,11,TRUE),
(29,1,2.12,11,TRUE),
(30,1,3.52,8,TRUE),
(31,1,3.05,8,TRUE),
(32,1,2.32,8,TRUE),
(33,1,3.26,8,TRUE),
(34,1,3.49,5,TRUE),
(35,1,3.97,5,TRUE),
(36,1,4.36,4,TRUE),
(37,1,2.42,4,TRUE),
(38,1,4.01,3,TRUE),
(39,1,4.91,2,TRUE),
(40,1,4.48,2,TRUE),
(41,1,2.8,1,TRUE),
(42,1,5,1,TRUE);
drop table if exists coxph_out;
drop table if exists coxph_out_summary;
select coxph_train('leukemia', 'coxph_out', 'timedeath', 'ARRAY[grp, wbc]', 'status', NULL, NULL);
-- Values computed with Madlib
SELECT assert(
relative_error(coef, ARRAY[0.6987, 1.4148]) < 1e-2 AND
relative_error(loglikelihood, -100.5537) < 1e-2 AND
relative_error(std_err, ARRAY[0.3483,0.2743]) < 1e-2 AND
relative_error(z_stats, ARRAY[2.0057,5.1563]) < 1e-2 AND
relative_error(p_values, ARRAY[0.0448,0.000000025185]) < 1e-2,
'Cox-Proportional hazards (tied times of death): Wrong results'
) FROM coxph_out;
-- Values computed with Madlib
SELECT assert(
relative_error(coef, ARRAY[0.6987, 1.4148]) < 1e-2 AND
relative_error(loglikelihood, -100.5537) < 1e-2 AND
relative_error(std_err, ARRAY[0.3483,0.2743]) < 1e-2 AND
relative_error(z_stats, ARRAY[2.0057,5.1563]) < 1e-2 AND
relative_error(p_values, ARRAY[0.0448,0.000000025185]) < 1e-2,
'Cox-Proportional hazards (tied times of death): Wrong results'
) FROM (
SELECT (cox_prop_hazards_regr(
'leukemia', 'ARRAY[grp, wbc]', 'timedeath', 'status', 20, 'newton', 0.001
)).*
) q;
----------------------------------------------------------------------
create table rossi (
week integer,
fin integer,
prio integer,
arrest integer,
age_cat integer);
copy rossi from STDIN;
17 0 8 1 1
25 0 13 1 1
52 1 1 0 2
20 0 3 1 3
52 0 2 0 2
23 0 0 1 2
52 1 4 0 2
52 0 3 0 1
52 0 0 0 2
52 1 3 0 3
52 0 2 0 4
52 0 6 0 2
52 0 2 0 4
25 0 3 1 2
46 1 2 1 2
37 0 5 1 1
52 0 12 0 2
52 0 2 0 2
28 0 7 1 1
52 0 1 0 2
24 1 2 1 3
52 1 0 0 4
52 0 4 0 2
52 1 1 0 3
52 0 1 0 4
52 1 2 0 1
52 1 0 0 2
52 0 2 0 1
52 1 3 0 2
52 1 9 0 2
52 1 3 0 1
52 1 1 0 1
52 0 3 0 2
52 0 2 0 2
52 1 0 0 4
50 1 2 1 2
52 1 4 0 1
52 0 2 0 3
52 0 5 0 2
52 0 3 0 3
10 0 14 1 2
52 0 1 0 2
52 1 9 0 4
52 0 2 0 2
52 1 15 0 2
52 1 2 0 2
52 1 2 0 1
20 1 5 1 2
52 1 0 0 4
52 1 0 0 3
52 0 2 0 4
52 1 1 0 2
52 0 1 0 3
50 1 10 1 1
52 1 1 0 4
52 0 3 0 4
52 1 0 0 2
52 0 1 0 3
52 0 1 0 4
52 1 9 0 4
52 0 5 0 2
52 0 2 0 2
6 0 6 1 1
52 0 11 0 4
52 1 1 0 2
52 1 3 0 3
52 0 2 1 2
52 1 2 0 2
52 1 5 0 2
52 0 4 0 2
49 0 3 1 4
52 0 0 0 1
52 0 2 0 2
52 0 4 0 3
52 1 2 0 3
52 1 1 0 4
52 0 2 0 2
52 0 2 0 4
43 0 4 1 2
5 0 3 1 1
52 1 10 0 2
52 0 3 0 3
27 0 4 1 3
52 0 7 0 2
52 0 3 0 2
52 1 1 0 2
22 1 10 1 1
18 0 4 1 2
52 0 4 0 4
52 1 2 0 2
52 1 4 0 2
52 0 1 0 2
52 0 2 0 2
52 1 1 0 2
52 1 2 0 2
24 1 4 1 2
52 1 2 0 2
52 1 3 0 1
52 1 1 0 2
52 1 1 0 2
52 0 3 0 2
26 0 2 1 4
2 0 2 1 4
49 1 1 1 1
48 0 6 1 1
21 0 0 1 3
52 0 1 0 2
52 1 1 0 2
52 0 6 0 2
52 0 3 0 2
52 0 1 0 2
52 1 3 0 2
52 0 0 0 2
52 1 1 0 3
52 0 2 0 2
52 0 1 0 2
8 1 1 1 4
52 1 4 0 1
52 0 3 0 2
52 0 1 0 2
52 1 2 0 2
49 0 1 1 2
52 1 6 0 2
52 1 14 0 2
52 1 1 0 3
52 0 3 0 3
52 0 2 0 4
52 0 8 0 2
8 0 5 1 2
52 0 4 0 2
52 1 2 0 2
52 0 2 0 3
52 1 2 0 2
13 0 5 1 2
52 1 2 0 1
52 1 3 0 2
8 1 11 1 2
52 1 4 0 3
52 1 4 0 2
52 1 5 0 3
52 1 0 0 1
52 0 8 0 2
33 0 10 1 1
11 1 2 1 1
52 1 4 0 3
52 0 4 0 3
52 1 1 0 2
52 1 0 0 1
52 1 1 0 3
37 0 2 1 4
52 0 3 0 2
52 1 2 0 4
52 0 1 0 4
44 0 1 1 2
52 0 0 0 2
52 0 1 1 2
52 1 4 0 4
52 1 1 0 4
52 1 1 0 2
52 1 10 0 4
52 1 8 0 4
52 0 1 0 2
52 1 8 0 2
52 1 2 0 4
52 0 6 0 1
52 1 2 0 2
52 1 3 0 3
52 1 4 0 2
52 1 5 0 3
52 1 3 0 4
52 0 3 0 1
52 1 11 0 2
52 1 2 0 2
17 0 8 1 2
9 1 3 1 3
52 0 10 0 1
52 1 2 0 1
52 1 6 0 2
52 1 1 0 2
52 1 10 0 2
52 1 2 0 4
52 1 7 0 2
16 0 3 1 4
3 0 3 1 3
52 0 1 0 2
52 1 2 0 2
52 0 0 0 4
52 1 1 0 4
52 0 2 0 2
52 0 3 0 4
52 1 13 0 2
52 1 1 0 2
45 0 5 1 2
52 1 2 0 2
52 1 1 0 2
52 1 0 0 2
52 0 1 0 1
52 0 1 0 2
52 1 2 0 2
52 0 2 0 2
52 1 4 0 1
28 0 1 1 2
52 0 4 0 2
16 1 5 1 3
52 0 0 0 2
52 0 1 0 1
15 1 4 1 1
52 1 1 0 1
52 1 1 0 2
14 0 0 1 2
52 1 2 0 2
52 1 1 0 3
52 1 1 0 4
52 0 4 0 3
52 0 4 0 3
52 1 7 0 2
52 0 1 0 3
52 0 4 0 2
52 1 2 0 2
52 0 2 0 3
52 0 3 0 2
52 1 1 0 2
52 0 3 0 4
52 1 2 0 2
52 1 3 0 2
7 1 2 1 2
52 1 1 0 2
52 1 2 0 3
40 1 6 1 2
46 1 1 1 2
43 0 3 1 1
52 1 5 0 2
52 1 1 0 2
52 0 11 0 2
14 0 7 1 2
8 0 4 1 3
52 0 1 0 2
52 0 1 0 2
52 0 2 0 2
52 0 3 0 3
52 0 1 0 1
25 0 18 1 3
52 0 2 0 2
52 0 1 0 2
52 0 8 0 2
37 1 1 1 2
17 0 5 1 2
52 1 2 0 2
52 0 1 0 3
32 0 3 1 1
52 1 1 0 2
52 0 2 0 2
52 1 5 0 4
52 1 8 0 2
52 1 4 0 2
52 1 2 0 4
12 1 0 1 3
52 0 1 0 3
52 0 0 0 4
52 0 3 0 3
52 0 5 0 2
52 0 1 0 2
18 0 4 1 2
14 1 12 1 1
52 1 1 0 4
52 0 3 0 1
52 0 1 0 2
52 1 2 0 2
24 0 2 1 4
52 1 1 0 4
38 0 2 1 2
20 1 1 1 2
52 1 2 0 2
32 1 0 1 1
52 0 4 0 1
52 1 0 0 3
52 1 4 0 2
52 1 1 0 2
52 1 2 0 3
52 1 2 0 2
52 1 1 0 4
52 1 5 0 2
52 0 0 0 2
52 1 2 0 4
52 1 2 0 4
31 0 5 1 1
20 1 1 1 2
40 0 3 1 1
42 1 1 1 3
52 1 2 0 4
52 0 0 0 2
52 0 2 0 4
52 0 2 0 2
52 0 9 0 2
26 0 1 1 3
52 1 5 0 2
47 0 3 1 2
52 1 3 0 2
52 1 2 0 2
52 1 2 0 2
52 0 2 0 2
52 0 1 0 1
52 0 2 0 2
40 0 1 1 2
21 0 3 1 3
52 1 2 0 3
52 0 1 0 4
52 0 5 0 2
52 1 6 0 4
52 0 3 0 1
52 1 2 0 2
52 1 0 0 2
24 0 1 1 2
52 1 4 0 1
1 0 0 1 2
43 0 3 1 2
52 1 6 0 1
11 0 18 1 1
52 0 3 0 1
52 1 2 0 4
46 1 5 1 2
52 0 1 0 2
33 0 3 1 2
52 0 1 0 2
18 1 4 1 1
36 1 3 1 1
52 1 1 0 2
52 1 0 0 2
52 1 2 0 2
52 1 1 0 2
52 1 5 0 4
50 0 8 1 2
52 0 1 0 2
34 1 11 1 2
52 1 4 0 2
35 1 1 1 1
52 0 2 0 3
52 1 3 0 4
39 0 4 1 2
9 1 0 1 3
52 0 1 0 2
52 1 1 0 4
52 0 1 0 3
34 1 3 1 1
52 1 1 0 2
52 1 2 0 3
52 1 1 0 4
44 0 2 1 2
39 1 5 1 3
52 0 3 0 2
35 1 3 1 2
30 0 1 1 1
52 0 0 0 4
52 1 1 0 2
52 0 1 0 4
52 0 1 0 3
52 0 1 0 2
19 1 4 1 2
52 0 2 0 2
43 0 10 1 2
52 0 1 0 2
48 1 4 1 2
37 1 11 1 3
20 1 1 1 3
52 0 0 0 3
52 0 1 0 3
36 1 3 1 2
52 1 4 0 3
52 1 5 0 2
52 1 3 0 2
52 0 7 0 1
52 0 4 0 2
52 0 1 0 4
52 1 9 0 2
30 1 2 1 2
52 0 1 0 4
52 1 3 0 3
52 0 1 0 3
52 1 0 0 2
52 0 2 0 2
52 0 6 0 2
52 0 0 0 2
52 1 1 0 3
42 1 0 1 2
26 0 2 1 2
52 0 5 0 2
52 1 2 0 4
52 1 0 0 3
52 1 2 0 2
40 0 2 1 1
52 0 0 0 1
52 0 2 0 2
52 0 3 0 1
35 1 2 1 1
52 0 2 0 2
46 0 2 1 2
49 1 1 1 1
49 1 0 1 1
52 0 0 0 2
52 0 2 0 2
52 0 1 0 2
52 0 2 0 2
52 1 1 0 2
52 0 5 0 2
52 1 1 0 3
52 0 2 0 2
35 0 4 1 2
52 1 4 0 3
52 1 1 0 2
52 1 4 0 4
52 1 4 0 4
27 0 1 1 2
45 1 5 1 1
52 0 1 0 2
52 1 1 0 2
52 0 0 1 2
52 1 1 0 2
4 0 1 1 1
52 0 2 1 4
36 1 2 1 1
52 0 3 0 1
52 1 1 0 2
8 1 4 1 2
15 1 3 1 2
52 1 3 0 4
19 0 2 1 1
52 0 2 0 2
12 1 2 1 2
52 1 1 0 2
52 0 1 0 2
52 1 1 0 2
52 0 3 0 3
\.
drop table if exists coxph_out;
drop table if exists coxph_out_summary;
select coxph_train('rossi', 'coxph_out', 'week', 'ARRAY[fin, prio]', 'arrest', NULL, 'max_iter=100, tolerance=1e-8');
SELECT assert(
relative_error(coef, ARRAY[-0.39782100911,0.10374598626]) < 1e-2 AND
relative_error(loglikelihood, -667.48864632) < 1e-2 AND
relative_error(std_err, ARRAY[0.19004386046,0.026739386277]) < 1e-2 AND
relative_error(z_stats, ARRAY[-2.093311555,3.8798940702]) < 1e-2 AND
relative_error(p_values, ARRAY[0.0363213518100,0.0001045019663]) < 1e-2,
'Cox-Proportional hazards (unique time of death): Wrong results'
) FROM coxph_out;
drop table if exists coxph_out;
drop table if exists coxph_out_summary;
select coxph_train('rossi', 'coxph_out', 'week', 'ARRAY[fin, prio]', 'arrest', 'age_cat', 'max_iter=100, tolerance=1e-8');
SELECT assert(
relative_error(coef, ARRAY[-0.34153528582,0.093981324]) < 1e-2 AND
relative_error(loglikelihood, -522.769302440094) < 1e-2 AND
relative_error(std_err, ARRAY[0.190199471912,0.026988079968]) < 1e-2 AND
relative_error(z_stats, ARRAY[-1.7956689489,3.4823271905]) < 1e-2 AND
relative_error(p_values, ARRAY[0.0725471830500,0.000497075935]) < 1e-2,
'Cox-Proportional hazards (unique time of death): Wrong results'
) FROM coxph_out;
-- prediction --
CREATE TABLE sample_data (
id integer NOT NULL,
grp double precision,
wbc double precision,
timedeath integer,
status boolean,
num integer NOT NULL
);
COPY sample_data (id, grp, wbc, timedeath, status, num)
FROM stdin
WITH DELIMITER AS ',';
1, 0, 1.47, 34, t, 1
0, 0, 1.45, 35, t, 1
3, 0, 2.2000000000000002, 32, t, 1
4, 0, 1.78, 25, t, 1
5, 0, 2.5699999999999998, 23, t, 1
6, 0, 2.3199999999999998, 22, t, 1
7, 0, 2.0099999999999998, 20, t, 1
8, 0, 2.0499999999999998, 19, t, 1
9, 0, 2.1600000000000001, 17, t, 1
10, 0, 3.6000000000000001, 16, t, 1
15, 0, 2.7000000000000002, 10, t, 1
12, 0, 2.8799999999999999, 13, t, 1
19, 0, 2.3100000000000001, 6, t, 1
14, 0, 2.6000000000000001, 11, t, 1
11, 1, 2.2999999999999998, 15, t, 0
16, 0, 2.7999999999999998, 9, t, 1
13, 1, 1.5, 12, t, 0
18, 0, 4.4299999999999997, 7, t, 1
17, 1, 2.3199999999999998, 8, t, 0
20, 1, 3.4900000000000002, 5, t, 0
21, 1, 2.4199999999999999, 4, t, 0
22, 1, 4.0099999999999998, 3, t, 0
23, 1, 4.9100000000000001, 2, t, 0
24, 1, 5, 1, t, 0
\.
drop table if exists sample_cox, sample_cox_summary;
SELECT coxph_train('sample_data', 'sample_cox', 'timedeath', 'ARRAY[wbc]', 'status', 'grp,num');
drop table if exists coxph_pred_out;
SELECT coxph_predict('sample_cox', 'sample_data', 'id', 'coxph_pred_out', 'linear_predictors', 'overall');
drop table if exists coxph_pred_out;
SELECT coxph_predict('sample_cox', 'sample_data', 'id', 'coxph_pred_out', 'risk', 'strata');
drop table if exists coxph_pred_out;
SELECT coxph_predict('sample_cox', 'sample_data', 'id', 'coxph_pred_out', 'terms', 'strata');
SELECT * from coxph_pred_out;
SELECT array_agg(terms[1] order by id) FROM coxph_pred_out;
SELECT assert(relative_error(array_agg(terms[1] order by id),
ARRAY[-1.95694557008923,-1.92612752961539,-0.80126905232,-1.44844790227077,
-0.231135303553847,-0.616360809476924,-1.09404043682154,-1.03240435587385,
-0.862905133267692,1.35599378084923,-0.64717884995077,0.246544323790769,
-1.87990046890462,-0.184908242843077,-0.0308180404738462,0.123272161895384,
-0.616360809476924,2.63494246051385,-0.631769829713846,1.18649455824308,
-0.462270607107693,1.98776361056308,3.37457543188615,3.5132566140184]) < .4,
'prediction errorr while predicting terms') FROM coxph_pred_out;
!>)
m4_changequote(<!`!>,<!'!>)