blob: 96ee2e24f9f6ab72cbd00c4eb0ef98162b2d3169 [file] [log] [blame]
CREATE TABLE rand_numeric (a int2, b int4, c int8, d float4, e float8, f real);
INSERT INTO rand_numeric
SELECT
random() * 1000,
random() * 1000,
random() * 1000,
random() * 1000,
random() * 1000,
random() * 1000
FROM
generate_series(1, 100);
SELECT * FROM correlation();
DROP TABLE IF EXISTS corr_output, corr_output_summary;
SELECT * FROM correlation('rand_numeric', 'corr_output');
SELECT * FROM corr_output ORDER BY column_position;
DROP TABLE IF EXISTS corr_output, corr_output_summary;
SELECT * FROM correlation('rand_numeric', 'corr_output', ' ');
DROP TABLE IF EXISTS corr_output, corr_output_summary;
SELECT * FROM correlation('rand_numeric', 'corr_output', '');
DROP TABLE IF EXISTS corr_output, corr_output_summary;
SELECT * FROM correlation('rand_numeric', 'corr_output', Null);
DROP TABLE IF EXISTS corr_output, corr_output_summary;
SELECT * FROM correlation('rand_numeric', 'corr_output', 'a, c, e');
DROP TABLE IF EXISTS example_data;
CREATE TABLE example_data(
id SERIAL, outlook TEXT,
temperature FLOAT8, humidity FLOAT8,
windy TEXT, class TEXT, new_col FLOAT8);
INSERT INTO example_data VALUES
(1, 'sunny', 85, 85, 'false', 'Dont Play', 1),
(2, 'sunny', 80, 90, 'true', 'Dont Play', 12),
(3, 'overcast', 83, 78, 'false', 'Play', 13),
(4, 'rain', 70, 96, 'false', 'Play', 16),
(5, 'rain', 68, 80, 'false', 'Play', 17),
(6, 'rain', 65, 70, 'true', 'Dont Play', 12),
(7, 'overcast', 64, 65, 'true', 'Play', 15),
(8, 'sunny', 72, 95, 'false', 'Dont Play', 19),
(9, 'sunny', 69, 70, 'false', 'Play', 20),
(10, 'rain', 75, 80, 'false', 'Play', 32),
(11, 'sunny', 75, 70, 'true', 'Play', 31),
(12, 'overcast', 72, 90, 'true', 'Play', 11),
(13, 'overcast', 81, 75, 'false', 'Play', 31),
(14, 'rain', 71, 80, 'true', 'Dont Play', 21),
(15, 'rain', NULL, 100, 'true', NULL, 11),
(16, 'rain', 110, NULL, 'true', NULL, 13);
DROP TABLE IF EXISTS example_data_output, example_data_output_summary, example_data_output_covariance, example_data_output_covariance_summary;
SELECT correlation( 'example_data',
'example_data_output',
'temperature, humidity, id, new_col',
True);
SELECT covariance( 'example_data',
'example_data_output_covariance',
'temperature, humidity, id, new_col',
True);
SELECT assert(relative_error(temperature, 0.1606) < 0.0001, 'calculated correlation value: '|| temperature ||' does not match expected: ' || 0.1606) FROM example_data_output WHERE variable ='humidity';
SELECT assert(relative_error(humidity, -0.3502) < 0.0001, 'calculated correlation value ' || humidity || 'does not match expected: ' || -0.3502) FROM example_data_output WHERE variable ='new_col';
SELECT assert(relative_error(id, 0.4122) < 0.0001, 'calculated correlation value ' || id || 'does not match expected: ' || 0.4122) FROM example_data_output WHERE variable ='new_col';
SELECT assert(relative_error(temperature, 17.1500) < 0.0001, 'calculated covariance value: '|| temperature ||' does not match expected: ' || 17.1500) FROM example_data_output_covariance WHERE variable ='humidity';
SELECT assert(relative_error(humidity, -28.7000) < 0.0001, 'calculated covariance value ' || humidity || 'does not match expected: ' || -28.7000) FROM example_data_output_covariance WHERE variable ='new_col';
SELECT assert(relative_error(id, 15.4688) < 0.0001, 'calculated covariance value ' || id || 'does not match expected: ' || 15.4688) FROM example_data_output_covariance WHERE variable ='new_col';
DROP TABLE IF EXISTS example_data_gr2;
CREATE TABLE example_data_gr2(
id SERIAL, outlook TEXT,
temperature FLOAT8, humidity FLOAT8,
windy TEXT, class TEXT, new_col FLOAT8, gr TEXT, gr2 TEXT);
INSERT INTO example_data_gr2 VALUES
(1, 'sunny', 85, 85, 'false', 'Dont Play', 1, 1, 1),
(2, 'sunny', 80, 90, 'true', 'Dont Play', 12, 1, 1),
(3, 'overcast', 83, 78, 'false', 'Play', 13, 1, 1),
(4, 'rain', 70, 96, 'false', 'Play', 16, 1, 1),
(5, 'rain', 68, 80, 'false', 'Play', 17, 1, 1),
(6, 'rain', 65, 70, 'true', 'Dont Play', 12, 1, 1),
(7, 'overcast', 64, 65, 'true', 'Play', 15, 1, 1),
(8, 'sunny', 72, 95, 'false', 'Dont Play', 19, 1, 1),
(9, 'sunny', 69, 70, 'false', 'Play', 20, 1, 1),
(10, 'rain', 75, 80, 'false', 'Play', 32, 1, 1),
(11, 'sunny', 75, 70, 'true', 'Play', 31, 1, 1),
(12, 'overcast', 72, 90, 'true', 'Play', 11, 1, 1),
(13, 'overcast', 81, 75, 'false', 'Play', 31, 1, 1),
(14, 'rain', 71, 80, 'true', 'Dont Play', 21, 1, 1),
(15, 'rain', NULL, 100, 'true', NULL, 11, 1, 1),
(16, 'rain', 110, NULL, 'true', NULL, 13, 1, 1),
(1, 'sunny', 78, 81, 'false', 'Dont Play', 1, 2, 2),
(2, 'sunny', 80, 90, 'true', 'Dont Play', 12, 2, 2),
(3, 'overcast', 82, 78, 'false', 'Play', 13, 2, 2),
(4, 'rain', 70, 96, 'false', 'Play', 16, 2, 2),
(5, 'rain', 68, 80, 'false', 'Play', 17, 2, 2),
(6, 'rain', 69, 70, 'true', 'Dont Play', 12, 2, 2),
(7, 'overcast', 66, 65, 'true', 'Play', 15, 2, 2),
(8, 'sunny', 72, 101, 'false', 'Dont Play', 19, 2, 2),
(9, 'sunny', 71, 68, 'false', 'Play', 20, 2, 2),
(10, 'rain', 75, 80, 'false', 'Play', 32, 2, 3),
(11, 'sunny', 75, 70, 'true', 'Play', 31, 2, 3),
(12, 'overcast', 73, 93, 'true', 'Play', 11, 2, 3),
(13, 'overcast', 83, 77, 'false', 'Play', 31, 2, 3),
(14, 'rain', 69, 81, 'true', 'Dont Play', 21, 2, 3),
(15, 'rain', NULL, 100, 'true', NULL, 11, 2, 3),
(16, 'rain', 100, NULL, 'true', NULL, 13, 2, 3);
DROP TABLE IF EXISTS example_data_gr2_output, example_data_gr2_output_summary;
SELECT correlation( 'example_data_gr2',
'example_data_gr2_output',
'temperature, humidity, id, new_col',
FALSE,
'gr,gr2');
SELECT assert(relative_error(temperature, 0.1606) < 0.0001, 'calculated correlation value: '|| temperature ||' does not match expected: ' || 0.1606) FROM example_data_gr2_output WHERE variable ='humidity' AND gr='1' AND gr2='1';
SELECT assert(relative_error(humidity, -0.3502) < 0.0001, 'calculated correlation value ' || humidity || 'does not match expected: ' || -0.3502) FROM example_data_gr2_output WHERE variable ='new_col' AND gr='1' AND gr2='1';
SELECT assert(relative_error(id, 0.4122) < 0.0001, 'calculated correlation value ' || id || 'does not match expected: ' || 0.4122) FROM example_data_gr2_output WHERE variable ='new_col' AND gr='1' AND gr2='1';
DROP TABLE IF EXISTS example_data_gr2_output, example_data_gr2_output_summary;
SELECT correlation( 'example_data_gr2',
'example_data_gr2_output',
'temperature, humidity, id, new_col',
FALSE,
'gr,gr2', 2);
SELECT assert(relative_error(temperature, 0.1606) < 0.0001, 'calculated correlation value: '|| temperature ||' does not match expected: ' || 0.1606) FROM example_data_gr2_output WHERE variable ='humidity' AND gr='1' AND gr2='1';
SELECT assert(relative_error(humidity, -0.3502) < 0.0001, 'calculated correlation value ' || humidity || 'does not match expected: ' || -0.3502) FROM example_data_gr2_output WHERE variable ='new_col' AND gr='1' AND gr2='1';
SELECT assert(relative_error(id, 0.4122) < 0.0001, 'calculated correlation value ' || id || 'does not match expected: ' || 0.4122) FROM example_data_gr2_output WHERE variable ='new_col' AND gr='1' AND gr2='1';
DROP TABLE IF EXISTS example_data_gr2_output_covariance, example_data_gr2_output_covariance_summary;
SELECT covariance( 'example_data_gr2',
'example_data_gr2_output_covariance',
'temperature, humidity, id, new_col',
FALSE,
'gr,gr2');
SELECT assert(relative_error(temperature, 17.1500) < 0.0001, 'calculated covariance value: '|| temperature ||' does not match expected: ' || 17.1500) FROM example_data_gr2_output_covariance WHERE variable ='humidity' AND gr='1' AND gr2='1';
SELECT assert(relative_error(humidity, -28.7000) < 0.0001, 'calculated covariance value ' || humidity || 'does not match expected: ' || -28.7000) FROM example_data_gr2_output_covariance WHERE variable ='new_col' AND gr='1' AND gr2='1';
SELECT assert(relative_error(id, 15.4688) < 0.0001, 'calculated covariance value ' || id || 'does not match expected: ' || 15.4688) FROM example_data_gr2_output_covariance WHERE variable ='new_col' AND gr='1' AND gr2='1';
DROP TABLE IF EXISTS example_data_gr2_output_covariance, example_data_gr2_output_covariance_summary;
SELECT covariance( 'example_data_gr2',
'example_data_gr2_output_covariance',
'temperature, humidity, id, new_col',
FALSE,
'gr,gr2', 50);
SELECT assert(count(*) = 12, 'the output should have 3 groups X 4 variables = 12 rows') FROM example_data_gr2_output_covariance;
SELECT assert(relative_error(temperature, 17.1500) < 0.0001, 'calculated covariance value: '|| temperature ||' does not match expected: ' || 17.1500) FROM example_data_gr2_output_covariance WHERE variable ='humidity' AND gr='1' AND gr2='1';
SELECT assert(relative_error(humidity, -28.7000) < 0.0001, 'calculated covariance value ' || humidity || 'does not match expected: ' || -28.7000) FROM example_data_gr2_output_covariance WHERE variable ='new_col' AND gr='1' AND gr2='1';
SELECT assert(relative_error(id, 15.4688) < 0.0001, 'calculated covariance value ' || id || 'does not match expected: ' || 15.4688) FROM example_data_gr2_output_covariance WHERE variable ='new_col' AND gr='1' AND gr2='1';