blob: 2998f2203a0a96b1ad692044f9ce194e0fa070f3 [file] [log] [blame]
/* -----------------------------------------------------------------------------
* Test Ordinal Logistic Regression.
* -------------------------------------------------------------------------- */
/*
* The values given by the multinomial logistic regression were cross checked
* with the function clm() in R ordinal package.
*
* example: polr(cat ~ feat1 + feat2, data=dat, link="logit")
*
*/
-- create table for training
DROP TABLE IF EXISTS multinom_test;
CREATE TABLE multinom_test (
feat1 INTEGER,
feat2 INTEGER,
cat INTEGER,
g CHAR
);
INSERT INTO multinom_test(feat1, feat2, cat, g) VALUES
(1,35,1,'A'),
(2,33,0,'A'),
(3,39,1,'A'),
(1,37,1,'A'),
(2,31,1,'A'),
(3,36,0,'A'),
(2,36,1,'A'),
(2,31,1,'A'),
(2,41,1,'A'),
(2,37,1,'A'),
(1,44,1,'A'),
(3,33,2,'A'),
(1,31,1,'A'),
(2,44,1,'A'),
(1,35,1,'A'),
(1,44,0,'A'),
(1,46,0,'A'),
(2,46,1,'A'),
(2,46,2,'A'),
(3,49,1,'A'),
(2,39,0,'A'),
(2,44,1,'A'),
(1,47,1,'A'),
(1,44,1,'A'),
(1,37,2,'A'),
(3,38,2,'A'),
(1,49,0,'A'),
(2,44,0,'A'),
(1,41,2,'A'),
(1,50,2,'A'),
(2,44,0,'A'),
(1,39,1,'A'),
(1,40,2,'A'),
(1,46,2,'A'),
(2,41,1,'A'),
(2,39,1,'A'),
(2,33,1,'A'),
(3,59,2,'A'),
(1,41,0,'A'),
(2,47,2,'A'),
(2,31,0,'A'),
(3,42,2,'A'),
(1,55,2,'A'),
(3,40,1,'A'),
(1,44,2,'A'),
(1,54,1,'A'),
(2,46,1,'A'),
(1,54,0,'A'),
(2,42,1,'A'),
(2,49,2,'A'),
(2,41,2,'A'),
(2,41,1,'A'),
(1,44,0,'A'),
(1,57,2,'A'),
(2,52,2,'A'),
(1,49,0,'A'),
(3,41,2,'A'),
(3,57,0,'A'),
(1,62,1,'A'),
(3,33,0,'A'),
(2,54,1,'A'),
(2,40,2,'A'),
(3,52,2,'A'),
(2,57,1,'A'),
(2,49,1,'A'),
(2,46,1,'A'),
(1,57,0,'A'),
(2,49,2,'A'),
(2,52,2,'A'),
(2,53,0,'A'),
(3,54,2,'A'),
(2,57,2,'A'),
(3,41,2,'A'),
(1,52,0,'A'),
(2,57,1,'A'),
(1,54,0,'A'),
(2,52,1,'A'),
(2,52,0,'A'),
(2,44,0,'A'),
(2,46,2,'A'),
(1,49,1,'A'),
(2,54,2,'A'),
(3,52,2,'A'),
(1,44,0,'A'),
(3,49,1,'A'),
(1,46,2,'A'),
(2,54,0,'A'),
(2,39,0,'A'),
(2,59,0,'A'),
(2,45,1,'A'),
(3,52,1,'A'),
(3,54,0,'A'),
(3,44,1,'A'),
(2,50,2,'A'),
(2,62,1,'A'),
(2,59,0,'A'),
(2,52,2,'A'),
(2,52,1,'A'),
(2,46,1,'A'),
(2,41,0,'A'),
(2,52,2,'A'),
(2,52,1,'A'),
(2,55,1,'A'),
(2,41,1,'A'),
(2,49,0,'A'),
(1,59,2,'A'),
(1,54,0,'A'),
(2,54,0,'A'),
(2,59,2,'A'),
(2,55,2,'A'),
(1,62,2,'A'),
(2,54,2,'A'),
(2,54,2,'A'),
(2,54,2,'A'),
(2,59,2,'A'),
(2,57,1,'A'),
(3,61,2,'A'),
(3,52,2,'A'),
(2,59,2,'A'),
(2,62,2,'A'),
(1,60,1,'A'),
(2,59,2,'A'),
(2,65,2,'A'),
(3,61,2,'A'),
(2,59,2,'A'),
(3,59,2,'A'),
(2,59,2,'A'),
(2,59,2,'A'),
(2,65,2,'A'),
(3,57,2,'A'),
(2,59,2,'A'),
(3,49,2,'A'),
(1,49,0,'A'),
(3,59,2,'A'),
(2,62,2,'A'),
(3,59,0,'A'),
(2,54,2,'A'),
(3,63,2,'A'),
(1,43,2,'A'),
(3,54,2,'A'),
(3,52,2,'A'),
(1,57,2,'A'),
(2,57,0,'A'),
(2,57,0,'A'),
(2,61,2,'A'),
(2,62,0,'A'),
(2,62,0,'A'),
(1,65,0,'A'),
(2,57,2,'A'),
(3,59,2,'A'),
(2,59,2,'A'),
(3,62,2,'A'),
(2,65,2,'B'),
(2,62,1,'B'),
(1,62,0,'B'),
(2,62,2,'B'),
(3,54,2,'B'),
(3,62,2,'B'),
(1,65,2,'B'),
(3,62,2,'B'),
(3,67,0,'B'),
(3,65,0,'B'),
(1,60,2,'B'),
(3,59,2,'B'),
(2,59,2,'B'),
(2,59,1,'B'),
(3,65,0,'B'),
(3,62,2,'B'),
(3,65,2,'B'),
(3,59,0,'B'),
(1,59,0,'B'),
(3,61,2,'B'),
(1,65,2,'B'),
(3,67,1,'B'),
(3,65,2,'B'),
(1,65,2,'B'),
(2,67,2,'B'),
(1,65,2,'B'),
(1,62,2,'B'),
(3,52,2,'B'),
(3,63,2,'B'),
(2,59,2,'B'),
(3,65,2,'B'),
(2,59,0,'B'),
(3,67,2,'B'),
(3,67,2,'B'),
(3,60,2,'B'),
(3,67,2,'B'),
(3,62,2,'B'),
(2,54,2,'B'),
(3,65,2,'B'),
(3,62,2,'B'),
(2,59,2,'B'),
(3,60,2,'B'),
(3,63,2,'B'),
(3,65,2,'B'),
(2,63,1,'B'),
(2,67,2,'B'),
(2,65,2,'B'),
(2,62,2,'B');
-- training function for logit link
SELECT ordinal(
'multinom_test',
'ordinal_out',
'cat',
'ARRAY[feat1, feat2]',
'0<1<2',
'logit'
);
\x on
SELECT * FROM ordinal_out;
SELECT * FROM ordinal_out_summary;
\x off
SELECT assert(
relative_error(coef_threshold, ARRAY[2.6350, 3.8995]) < 1e-4 AND
relative_error(coef_feature, ARRAY[.53958, .05595]) < 1e-4 AND
relative_error(log_likelihood, -190.92) < 1e-4,
'Ordinal regression with IRLS optimizer (test): Wrong results'
)
FROM ordinal_out;
-- create table for prediction
DROP TABLE IF EXISTS multinom_to_prd;
CREATE TABLE multinom_to_prd (
feat1 INTEGER,
feat2 INTEGER,
id INTEGER,
g CHAR
);
INSERT INTO multinom_to_prd(feat1, feat2, id, g) VALUES
(1,35,1,'A'),
(2,33,2,'A'),
(3,39,3,'B'),
(1,37,4,'B');
-- prediction function
SELECT ordinal_predict(
'ordinal_out',
'multinom_to_prd',
'ordinal_prd',
'probability'
);
SELECT assert(
relative_error(prob, ARRAY[0.5342011, 0.4278446, 0.2375883, 0.5062776])< 1e-4,
'Ordinal regression prediction: Wrong results'
)
FROM (SELECT array_agg("0" ORDER BY id) as prob FROM ordinal_prd)subq;
-- training function for logit link of grouping case
DROP TABLE IF EXISTS ordinal_out, ordinal_out_summary;
SELECT ordinal(
'multinom_test',
'ordinal_out',
'cat',
'ARRAY[feat1, feat2]',
'0<1<2',
'logit',
'g'
);
\x on
SELECT * FROM ordinal_out;
SELECT * FROM ordinal_out_summary;
\x off
SELECT assert(
relative_error(coef_threshold, ARRAY[2.3497, 3.7680]) < 1e-4 AND
relative_error(coef_feature, ARRAY[0.59758, 0.04845]) < 1e-4 AND
relative_error(log_likelihood, -154.6991) < 1e-4,
'Ordinal regression with IRLS optimizer (test grouping case): Wrong results'
)
FROM ordinal_out
WHERE g='A';
DROP TABLE IF EXISTS ordinal_prd;
SELECT ordinal_predict(
'ordinal_out',
'multinom_to_prd',
'ordinal_prd',
'probability'
);
SELECT assert(
relative_error(prob, ARRAY[0.5141240, 0.3907468])< 1e-4,
'Ordinal regression prediction: Wrong results'
)
FROM (SELECT array_agg("0" ORDER BY id) as prob FROM ordinal_prd where id=1 OR id=2)subq;
-- install-check for ordinal probit
DROP TABLE IF EXISTS ordinal_out, ordinal_out_summary;
SELECT ordinal(
'multinom_test',
'ordinal_out',
'cat',
'ARRAY[feat1, feat2]',
'0<1<2',
'probit'
);
\x on
SELECT * FROM ordinal_out;
SELECT * FROM ordinal_out_summary;
\x off
SELECT assert(
relative_error(coef_threshold, ARRAY[1.5104, 2.2577]) < 1e-4 AND
relative_error(coef_feature, ARRAY[.31431, .03215]) < 1e-4 AND
relative_error(log_likelihood, -191.9912) < 1e-4,
'Ordinal regression with IRLS optimizer (test): Wrong results'
)
FROM ordinal_out;
DROP TABLE IF EXISTS ordinal_prd;
SELECT ordinal_predict(
'ordinal_out',
'multinom_to_prd',
'ordinal_prd',
'probability'
);
SELECT assert(
relative_error(prob, ARRAY[0.5281595, 0.4288294, 0.2461692, 0.5025270])< 1e-4,
'Ordinal regression prediction: Wrong results'
)
FROM (SELECT array_agg("0" ORDER BY id) as prob FROM ordinal_prd)subq;