blob: 49b86d35e353b8bbd3e6e83dcf4c0c72f9c1c5e1 [file] [log] [blame]
--
-- leastsquares.sql - test for ordinary least squares regression aggregates:
-- * regr_count
-- * regr_avgx
-- * regr_avgy
-- * regr_sxx
-- * regr_sxy
-- * regr_syy
-- * regr_intercept
-- * regr_slope
-- * regr_r2
CREATE TABLE weibull
(
id INTEGER NOT NULL,
x1 DOUBLE PRECISION,
x2 DOUBLE PRECISION,
y DOUBLE PRECISION
) DISTRIBUTED BY (id);
COPY weibull (id, x1, x2, y) FROM stdin;
--
-- Testing of basic single linear regression code
--
-- these queries should produce the same result.
select
regr_count(y, x1)::real as count,
regr_avgx(y, x1)::real as avgx,
regr_avgy(y, x1)::real as avgy,
regr_sxx(y, x1)::real as sxx,
regr_sxy(y, x1)::real as sxy,
regr_syy(y, x1)::real as syy,
regr_intercept(y, x1)::real as intercept,
regr_slope(y, x1)::real as slope,
regr_r2(y, x1)::real as r2
from weibull;
count | avgx | avgy | sxx | sxy | syy | intercept | slope | r2
-------+----------+-------+-----------+---------+----------+-----------+-----------+------------
17 | 55.35294 | 288.4 | 2182.8623 | 5230.02 | 13239.72 | 155.77734 | 2.3959458 | 0.94645846
(1 row)
select
count(y)::real as count,
avg(x1)::real as avgx,
avg(y)::real as avgy,
((count(y) * sum(x1*x1) - sum(x1)^2)/count(y))::real as sxx,
((count(y) * sum(x1*y) - sum(x1)*sum(y))/count(y))::real as sxy,
((count(y) * sum(y*y) - sum(y)^2)/count(y))::real as sxy,
((sum(y)*sum(x1*x1) - sum(x1)*sum(x1*y))/(count(y) * sum(x1*x1) - sum(x1)^2))::real as intercept,
((count(y) * sum(x1*y) - sum(x1)*sum(y))/(count(y) * sum(x1*x1) - sum(x1)^2))::real as slope,
((count(y) * sum(x1*y) - sum(x1)*sum(y))^2/
((count(y) * sum(x1*x1) - sum(x1)^2) * (count(y) * sum(y*y) - sum(y)^2)))::real as r2
from weibull;
count | avgx | avgy | sxx | sxy | sxy | intercept | slope | r2
-------+----------+-------+-----------+---------+----------+-----------+-----------+------------
17 | 55.35294 | 288.4 | 2182.8623 | 5230.02 | 13239.72 | 155.77734 | 2.3959458 | 0.94645846
(1 row)
-- Single linear and multivariate should match for a single independent variable
select
array[regr_intercept(y, x1), regr_slope(y, x1)]::real[] as coef,
regr_r2(y,x1)::real as r2
from weibull;
coef | r2
-----------------------+------------
{155.77734,2.3959458} | 0.94645846
(1 row)
select
array[regr_intercept(y, x2), regr_slope(y, x2)]::real[] as coef,
regr_r2(y,x2)::real as r2
from weibull;
coef | r2
------------------------+------------
{-449.02472,23.864878} | 0.94551253
(1 row)
DROP TABLE weibull;