blob: 12930a67f71c66353453a4605c4b076c5a280b9e [file] [log] [blame]
/* ----------------------------------------------------------------------- *//**
*
* @file arima.sql_in
*
* @brief Arima function for forecasting of timeseries data
* @date August 2013
*
*//* ------------------------------------------------------------------------*/
m4_include(`SQLCommon.m4')
/**
@addtogroup grp_arima
<div class ="toc"><b>Contents</b>
<ul>
<li class="level1"><a href="#train">Training Function</a></li>
<li class="level1"><a href="#forecast">Forecasting Function</a></li>
<li class="level1"><a href="#examples">Examples</a></li>
<li class="level1"><a href="#background">Technical Background</a></li>
<li class="level1"><a href="#literature">Literature</a></li>
<li class="level1"><a href="#related">Related Topics</a></li>
</ul>
</div>
@brief Generates a model with autoregressive, moving average, and integrated
components for a time series dataset.
Given a time series of data X, the Autoregressive Integrated Moving Average
(ARIMA) model is a tool for understanding and, perhaps, predicting future
values in the series. The model consists of three parts, an autoregressive
(AR) part, a moving average (MA) part, and an integrated (I) part where an
initial differencing step can be applied to remove any non-stationarity in the
signal. The model is generally referred to as an ARIMA(p, d, q) model where
parameters p, d, and q are non-negative integers that refer to the order of
the autoregressive, integrated, and moving average parts of the model
respectively.
@anchor train
@par Training Function
The ARIMA training function has the following syntax.
<pre class="syntax">
arima_train( input_table,
output_table,
timestamp_column,
timeseries_column,
grouping_columns,
include_mean,
non_seasonal_orders,
optimizer_params
)
</pre>
@b Arguments
<DL class="arglist">
<DT>input_table</DT>
<DD>TEXT. The name of the table containing time series data.</DD>
<DT>output_table</DT>
<DD>TEXT. The name of the table to store the ARIMA model.
Three tables are created, with names based on the value of the \e output_table
argument in the training function:
-# <em>output_table</em>: Table containing the ARIMA model. Contains the following columns:
<table class="output">
<tr>
<th>mean</DT>
<td>Model mean (only if 'include_mean' is TRUE)</td>
</tr>
<tr>
<th>mean_std_error</th>
<td>Standard errors for mean</td>
</tr>
<tr>
<th>ar_params</th>
<td>Auto-regressions parameters of the ARIMA model</td>
</tr>
<tr>
<th>ar_std_errors</th>
<td>Standard errors for AR parameters</td>
</tr>
<tr>
<th>ma_params</th>
<td>Moving average parameters of the ARIMA model</td>
</tr>
<tr>
<th>ma_std_errors</th>
<td>Standard errors for MA parameters</td>
</tr>
</table>
-# <em>output_table</em>_summary: Table containing descriptive statistics of the ARIMA model.
Contains the following columns:
<table class="output">
<tr>
<th>input_table</th>
<td>Table name with the source data</td>
</tr>
<tr>
<th>timestamp_col</th>
<td>Column name in the source table that contains the timestamp index to data</td>
</tr>
<tr>
<th>timeseries_col</th>
<td>Column name in the source table that contains the data values</td>
</tr>
<tr>
<th>non_seasonal_orders</th>
<td>Orders of the non-seasonal ARIMA model</td>
</tr>
<tr>
<th>include_mean</th>
<td>TRUE if intercept was included in ARIMA model</td>
</tr>
<tr>
<th>residual_variance</th>
<td>Variance of the residuals</td>
</tr>
<tr>
<th>log_likelihood</th>
<td>Log likelihood value (when using MLE)</td>
</tr>
<tr>
<th>iter_num</th>
<td>The number of iterations executed</td>
</tr>
<tr>
<th>exec_time</th>
<td>Total time taken to train the model</td>
</tr>
</table>
-# <em>output_table</em>_residual: Table containing the residuals for each
data point in 'input_table'. Contains the following columns:
<table class="output">
<tr>
<th>timestamp_col</th>
<td>Same as the 'timestamp_col' parameter
(all indices from source table included except the first
\e d elements, where \e d is the differencing order value
from 'non_seasonal_orders')
</td>
</tr>
<tr>
<th>residual</th>
<td>Residual value for each data point</td>
</tr>
</table>
</DD>
<DT>timestamp_column</DT>
<DD>TEXT. The name of the column containing the timestamp (index) data.
This could be a serial index (INTEGER) or date/time value (TIMESTAMP).</DD>
<DT>timeseries_column</DT>
<DD>TEXT. The name of the column containing the time series data. This data is
currently restricted to DOUBLE PRECISION.</DD>
<DT>grouping_columns (not currently implemented)</DT>
<DD>TEXT, default: NULL.
A comma-separated list of column names used to group the input dataset
into discrete groups, training one ARIMA model per group. It is similar to
the SQL <tt>GROUP BY</tt> clause. When this value is null, no grouping is
used and a single result model is generated.
@note Grouping is not currently implemented for ARIMA, but
will be added in the future. Any non-NULL value for this parameter
is ignored.</em></DD>
<DT>include_mean (optional)</DT>
<DD>BOOLEAN, default: FALSE. Mean value of the data series is added in the ARIMA model
if this variable is True. </DD>
<DT>non_seasonal_orders (optional)</DT>
<DD>INTEGER[], default: 'ARRAY[1,1,1]'. Orders of the ARIMA model. The orders are [p, d, q],
where parameters p, d, and q are non-negative integers that refer to
the order of the autoregressive, integrated, and moving average parts of the model
respectively. </DD>
<DT>optimizer_params (optional)</DT>
<DD>TEXT. Comma-separated list of optimizer-specific parameters of the form ‘name=value'.
The order of the parameters does not matter. The following parameters are recognized:
- \b max_iter: Maximum number of iterations to run learning algorithm
(Default = 100)
- \b tau: Computes the initial step size for gradient algorithm (Default = 0.001)
- \b e1: Algorithm-specific threshold for convergence (Default = 1e-15)
- \b e2: Algorithm-specific threshold for convergence (Default = 1e-15)
- \b e3: Algorithm-specific threshold for convergence (Default = 1e-15)
- \b hessian_delta: Delta parameter to compute a numerical approximation
of the Hessian matrix (Default = 1e-6)
</DD>
</DL>
@anchor forecast
@par Forecasting Function
The ARIMA forecast function has the following syntax.
<pre class="syntax">
arima_forecast( model_table,
output_table,
steps_ahead
)
</pre>
@b Arguments
<DL class="arglist">
<DT>model_table</DT>
<DD>TEXT. The name of the table containing the ARIMA model trained on the
time series dataset.</DD>
<DT>output_table</DT>
<DD>TEXT. The name of the table to store the forecasted values.
The output table produced by the forecast function contains the following columns.
<table class="output">
<tr>
<th>group_by_cols</th>
<td>Grouping column values (if grouping parameter is provided)</td>
</tr>
<tr>
<th>step_ahead</th>
<td>Time step for the forecast</td>
</tr>
<tr>
<th>forecast_value</th>
<td>Forecast of the current time step</td>
</tr>
</table>
</DD>
<DT>steps_ahead</DT>
<DD>INTEGER. The number of steps to forecast at the end of the time series.</DD>
</DL>
@anchor examples
@examp
-# View online help for the ARIMA training function.
<pre class="example">
SELECT madlib.arima_train();
</pre>
-# Create an input data set.
<pre class="example">
DROP TABLE IF EXISTS arima_beer;
CREATE TABLE arima_beer (time_id integer NOT NULL, value double precision NOT NULL );
COPY arima_beer (time_id, value) FROM stdin WITH DELIMITER '|';
1 | 93.2
2 | 96.0
3 | 95.2
4 | 77.0
5 | 70.9
6 | 64.7
7 | 70.0
8 | 77.2
9 | 79.5
10 | 100.5
11 | 100.7
12 | 107.0
13 | 95.9
14 | 82.7
15 | 83.2
16 | 80.0
17 | 80.4
18 | 67.5
19 | 75.7
20 | 71.0
21 | 89.2
22 | 101.0
23 | 105.2
24 | 114.0
25 | 96.2
26 | 84.4
27 | 91.2
28 | 81.9
29 | 80.5
30 | 70.4
31 | 74.7
32 | 75.9
33 | 86.2
34 | 98.7
35 | 100.9
36 | 113.7
37 | 89.7
38 | 84.4
39 | 87.2
40 | 85.5
\\.
</pre>
-# Train an ARIMA model.
<pre class="example">
-- Train ARIMA model with 'grouping_columns'=NULL, 'include_mean'=TRUE,
-- and 'non_seasonal_orders'=[1,1,1]
SELECT madlib.arima_train( 'arima_beer',
'arima_beer_output',
'time_id',
'value',
NULL,
FALSE,
ARRAY[1, 1, 1]
);
</pre>
-# Examine the ARIMA model.
<pre class="example">
\\x ON
SELECT * FROM arima_beer_output;
</pre>
Result:
<pre class="result">
-[ RECORD 1 ]-+------------------
ar_params | {0.221954769696}
ar_std_errors | {0.575367782602}
ma_params | {-0.140623564576}
ma_std_errors | {0.533445214346}
</pre>
-# View the summary statistics table.
<pre class="example">
SELECT * FROM arima_beer_output_summary;
</pre>
Result:
<pre class="result">
-[ RECORD 1 ]-------+---------------
input_table | arima_beer
timestamp_col | time_id
timeseries_col | value
non_seasonal_orders | {1,1,1}
include_mean | f
residual_variance | 100.989970539
log_likelihood | -145.331516396
iter_num | 28
exec_time (s) | 2.75
</pre>
-# View the residuals.
<pre class="example">
\\x OFF
SELECT * FROM arima_beer_output_residual;
</pre>
Result:
<pre class="result">
time_id | residual
---------+--------------------
2 | 0
4 | -18.222328834394
6 | -5.49616627282665
...
35 | 1.06298837051437
37 | -25.0886854003757
39 | 3.48401666299571
(40 rows)
</pre>
-# Use the ARIMA forecast function to forecast 10 future values.
<pre class="example">
SELECT madlib.arima_forecast( 'arima_beer_output',
'arima_beer_forecast_output',
10
);
SELECT * FROM arima_beer_forecast_output;
</pre>
Result:
<pre class="result">
steps_ahead | forecast_value
-------------+----------------
1 | 85.3802343659
3 | 85.3477516875
5 | 85.3461514635
7 | 85.3460726302
9 | 85.3460687465
2 | 85.3536518121
4 | 85.3464421267
6 | 85.3460869494
8 | 85.3460694519
10 | 85.34606859
(10 rows)
</pre>
@anchor background
@par Technical Background
An ARIMA model is an <em>a</em>uto-<em>r</em>egressive <em>i</em>ntegrated
<em>m</em>oving <em>a</em>verage model. An ARIMA model is typically expressed
in the form
\f[
(1 - \phi(B)) Y_t = (1 + \theta(B)) Z_t,
\f]
where \f$B\f$ is the backshift operator. The time \f$ t \f$ is from \f$ 1 \f$
to \f$ N \f$.
ARIMA models involve the following variables:
- The values of the time series: \f$ X_t \f$.
- Parameters of the model: \f$ p \f$, \f$ q \f$, and \f$ d \f$;
\f$ d \f$ is the differencing order, \f$ p \f$ is the order of the AR
operator, and \f$ q \f$ is the order of the MA operator.
- The AR operator: \f$ \phi(B) \f$.
- The MA operator: \f$ \theta(B) \f$.
- The lag difference: \f$ Y_{t} \f$, where \f$ Y_{t} = (1-B)^{d}(X_{t} - \mu) \f$.
- The mean value: \f$ \mu \f$, which is set to be zero for
\f$ d>0 \f$ and estimated from the data when d=0.
- The error terms: \f$ Z_t \f$.
The auto regression operator models the prediction for the next observation as
some linear combination of the previous observations. More formally, an AR
operator of order \f$ p \f$ is defined as
\f[
\phi(B) Y_t= \phi_1 Y_{t-1} + \dots + \phi_{p} Y_{t-p}
\f]
The moving average operator is similar, and it models the prediction
for the next observation as a linear combination of the errors in the
previous prediction errors. More formally, the MA operator of order
\f$ q \f$ is defined as
\f[
\theta(B) Z_t = \theta_{1} Z_{t-1} + \dots + \theta_{q} Z_{t-q}.
\f]
We estimate the parameters using the Levenberg-Marquardt Algorithm.
In mathematics and computing, the Levenberg-Marquardt algorithm (LMA),
also known as the damped least-squares (DLS) method, provides a
numerical solution to the problem of minimizing a function, generally
nonlinear, over a space of parameters of the function.
Like other numeric minimization algorithms, LMA is an iterative
procedure. To start a minimization, the user has to provide an
initial guess for the parameter vector, $p$, as well as some tuning
parameters \f$\tau, \epsilon_1, \epsilon_2, \epsilon_3,\f$.
@anchor literature
@par Literature
[1] Rob J Hyndman and George Athanasopoulos: Forecasting: principles and practice,
http://otexts.com/fpp/
[2] Robert H. Shumway, David S. Stoffer: Time Series Analysis and Its
Applications With R Examples, Third edition Springer Texts in Statistics, 2010
[3] Henri Gavin: The Levenberg-Marquardt method for nonlinear least squares
curve-fitting problems, 2011
@anchor related
@par Related Topics
File arima.sql_in documenting the ARIMA functions
*/
-----------------------------------------------------------------------
-- ARIMA TRAIN FUNCTIONS
-----------------------------------------------------------------------
/*
@brief Estimate ARIMA parameters from a timeseries data
*/
CREATE OR REPLACE FUNCTION
MADLIB_SCHEMA.arima_train(
input_table TEXT, -- Source table name
output_table TEXT, -- Output table prefix to
-- diagnostic and residual table
-- for the ARIMA model
-- Tables created are:
-- output_table
-- <output_table>_summary
-- <output_table>_residual
timestamp_column TEXT, -- Index column (e.g. timestamp) that
-- orders the time series
timeseries_column TEXT, -- Column that contains the values
-- of the time series
grouping_columns TEXT, -- Comma-separated list of grouping
-- columns (Default: NULL)
include_mean BOOLEAN, -- Boolean to control if constant
-- mean is
-- to included in the model
-- (Default: False)
non_seasonal_orders INTEGER[], -- Array of non-seasonal AR, I, and
-- MA orders (Default: ARRAY[1,1,1])
optimizer_params TEXT -- Control parameters for optimizer
)
RETURNS VOID AS $$
PythonFunctionBodyOnly(`tsa', `arima')
with AOControl(False):
arima.arima_train(schema_madlib, input_table, output_table,
timestamp_column, timeseries_column, grouping_columns,
include_mean, non_seasonal_orders, optimizer_params)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-- other optional parameters that can be added in future
-- estimation_method TEXT, -- Parameter estimation
-- -- algorithm (must be one of
-- -- 'mle', 'uls', 'cls')
-- seasonal_pdq INTEGER[], -- Array of seasonal AR, I,
-- -- MA orders and
-- -- seasonal cycle length
-- -- (Default: ARRAY[0,0,0,0])
-------------------------------------------------------------------------
-- Overloaded functions--
CREATE OR REPLACE FUNCTION
MADLIB_SCHEMA.arima_train(
input_table TEXT,
output_table TEXT,
timestamp_column TEXT,
timeseries_column TEXT,
grouping_columns TEXT,
include_mean BOOLEAN,
non_seasonal_orders INTEGER[]
)
RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, $5, $6,
$7, NULL)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
----------------------------------------------------------------------
CREATE OR REPLACE FUNCTION
MADLIB_SCHEMA.arima_train(
input_table TEXT,
output_table TEXT,
timestamp_column TEXT,
timeseries_column TEXT,
grouping_columns TEXT,
include_mean BOOLEAN
)
RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, $5, $6,
ARRAY[1,1,1], NULL)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION
MADLIB_SCHEMA.arima_train(
input_table TEXT,
output_table TEXT,
timestamp_column TEXT,
timeseries_column TEXT,
grouping_columns TEXT
)
RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, $5, False,
ARRAY[1,1,1], NULL)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION
MADLIB_SCHEMA.arima_train(
input_table TEXT,
output_table TEXT,
timestamp_column TEXT,
timeseries_column TEXT
)
RETURNS VOID AS $$
SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, NULL, False,
ARRAY[1,1,1], NULL)
$$ LANGUAGE sql VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
/*
@brief Forecast timeseries using an ARIMA model
*/
CREATE OR REPLACE FUNCTION
MADLIB_SCHEMA.arima_forecast(
model_table TEXT, -- Table name containing the ARIMA model
output_table TEXT, -- Output table name for forecasting by
-- the ARIMA model
steps_ahead INTEGER -- Number of steps to forecast ahead
)
RETURNS VOID AS $$
PythonFunctionBodyOnly(`tsa', `arima_forecast')
with AOControl(False):
arima_forecast.arima_forecast(schema_madlib, model_table, output_table,
steps_ahead)
$$ LANGUAGE plpythonu VOLATILE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');
-------------------------------------------------------------------------
-- Help functions ------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_train(
message TEXT
)
RETURNS TEXT AS $$
PythonFunctionBodyOnly(`tsa', `arima')
with AOControl(False):
return arima.arima_train_help_message(schema_madlib, message)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_train()
RETURNS TEXT AS $$
PythonFunction(tsa, arima, arima_train_help_message)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_forecast(
message TEXT
)
RETURNS TEXT AS $$
PythonFunctionBodyOnly(`tsa', `arima_forecast')
with AOControl(False):
return arima_forecast.arima_forecast_help_message(schema_madlib, message)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_forecast()
RETURNS TEXT AS $$
PythonFunctionBodyOnly(`tsa', `arima_forecast')
with AOControl(False):
return arima_forecast.arima_forecast_help_message(schema_madlib, None)
$$ LANGUAGE plpythonu IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');
------------------------------------------------------------------------
------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_residual
(
distid INTEGER,
tvals DOUBLE PRECISION[],
p INTEGER,
d INTEGER,
q INTEGER,
phi DOUBLE PRECISION[],
theta DOUBLE PRECISION[],
mean DOUBLE PRECISION,
prez DOUBLE PRECISION[]
)
RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_residual'
LANGUAGE C
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_diff
(
tvals DOUBLE PRECISION[],
d INTEGER
)
RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_diff'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
-------------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_adjust
(
distid INTEGER,
curr_tvals DOUBLE PRECISION[],
prev_tvals DOUBLE PRECISION[],
p INTEGER
)
RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_adjust'
LANGUAGE C
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
-------------------------------------------------------------------------
-- Compute the vector delta, the change of coefficients
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_delta
(
jj DOUBLE PRECISION[], -- J^T * J
jz DOUBLE PRECISION[], -- J^T * Z
u DOUBLE PRECISION -- Weight of gradient-descent step
)
RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_lm_delta'
LANGUAGE C STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
----------------------------------------------------------------------
-- One step of LM algorithm over a chunk
----------------------------------------------------------------------
DROP TYPE IF EXISTS MADLIB_SCHEMA.__arima_lm_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.__arima_lm_result AS
(
z2 DOUBLE PRECISION,
jj DOUBLE PRECISION[],
jz DOUBLE PRECISION[],
prez DOUBLE PRECISION[],
prej DOUBLE PRECISION[]
);
--------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm
(
distid INTEGER, -- Time series ID
tvals DOUBLE PRECISION[], -- Time series values
p INTEGER, -- P
q INTEGER, -- Q
phi DOUBLE PRECISION[], -- Phi
theta DOUBLE PRECISION[], -- Theta
mean DOUBLE PRECISION, -- Mean
prez DOUBLE PRECISION[], -- Previous Z
prej DOUBLE PRECISION[] -- Previous J
)
RETURNS MADLIB_SCHEMA.__arima_lm_result
AS 'MODULE_PATHNAME', 'arima_lm'
LANGUAGE C
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
----------------------------------------------------------------------
-- Accumulate results from all chunks
----------------------------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_result_sfunc (
state_data DOUBLE PRECISION[],
jj DOUBLE PRECISION[],
jz DOUBLE PRECISION[],
z2 DOUBLE PRECISION
) RETURNS DOUBLE PRECISION[] AS
'MODULE_PATHNAME', 'arima_lm_result_sfunc'
LANGUAGE C IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_result_pfunc (
state1 DOUBLE PRECISION[],
state2 DOUBLE PRECISION[]
) RETURNS DOUBLE PRECISION[] AS
'MODULE_PATHNAME', 'arima_lm_result_pfunc'
LANGUAGE C IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
------------------------------------------------
DROP TYPE IF EXISTS MADLIB_SCHEMA.__arima_lm_sum_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.__arima_lm_sum_result AS (
jj DOUBLE PRECISION[],
jz DOUBLE PRECISION[],
z2 DOUBLE PRECISION,
u DOUBLE PRECISION
);
------------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_result_ffunc (
state_data DOUBLE PRECISION[]
) RETURNS MADLIB_SCHEMA.__arima_lm_sum_result AS
'MODULE_PATHNAME', 'arima_lm_result_ffunc'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
------------------------------------------------
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__arima_lm_result_agg (
/* jj */ DOUBLE PRECISION[],
/* jz */ DOUBLE PRECISION[],
/* z2 */ DOUBLE PRECISION
);
CREATE AGGREGATE MADLIB_SCHEMA.__arima_lm_result_agg (
/* jj */ DOUBLE PRECISION[],
/* jz */ DOUBLE PRECISION[],
/* z2 */ DOUBLE PRECISION
) (
SType = DOUBLE PRECISION[],
SFunc = MADLIB_SCHEMA.__arima_lm_result_sfunc,
m4_ifdef(`__POSTGRESQL__', `', `PreFunc = MADLIB_SCHEMA.__arima_lm_result_pfunc,')
FinalFunc = MADLIB_SCHEMA.__arima_lm_result_ffunc
-- use NULL as the initial value
);
----------------------------------------------------------------------
-- Aggregate function to compute statistics
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_stat_sfunc (
state_data DOUBLE PRECISION[],
distid INTEGER,
tvals DOUBLE PRECISION[],
p INTEGER,
q INTEGER,
phi DOUBLE PRECISION[],
theta DOUBLE PRECISION[],
mean DOUBLE PRECISION,
delta DOUBLE PRECISION
) RETURNS DOUBLE PRECISION[] AS
'MODULE_PATHNAME', 'arima_lm_stat_sfunc'
LANGUAGE C IMMUTABLE
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
-------------------------------------------
DROP TYPE IF EXISTS MADLIB_SCHEMA.__arima_lm_stat_result CASCADE;
CREATE TYPE MADLIB_SCHEMA.__arima_lm_stat_result AS (
std_errs DOUBLE PRECISION[],
resid_var DOUBLE PRECISION,
loglik DOUBLE PRECISION
);
-------------------------------------------
CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_stat_ffunc (
state_data DOUBLE PRECISION[]
) RETURNS MADLIB_SCHEMA.__arima_lm_stat_result AS
'MODULE_PATHNAME', 'arima_lm_stat_ffunc'
LANGUAGE C IMMUTABLE STRICT
m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');
------------------------------------------
DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__arima_lm_stat_agg (
/* distid */ INTEGER,
/* tvals */ DOUBLE PRECISION[],
/* p */ INTEGER,
/* q */ INTEGER,
/* phi */ DOUBLE PRECISION[],
/* theta */ DOUBLE PRECISION[],
/* mean */ DOUBLE PRECISION,
/* delta */ DOUBLE PRECISION
);
CREATE m4_ifdef(`__POSTGRESQL__', `',
m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED')) AGGREGATE
MADLIB_SCHEMA.__arima_lm_stat_agg (
/* distid */ INTEGER,
/* tvals */ DOUBLE PRECISION[],
/* p */ INTEGER,
/* q */ INTEGER,
/* phi */ DOUBLE PRECISION[],
/* theta */ DOUBLE PRECISION[],
/* mean */ DOUBLE PRECISION,
/* delta */ DOUBLE PRECISION
) (
SType = DOUBLE PRECISION[],
SFunc = MADLIB_SCHEMA.__arima_lm_stat_sfunc,
FinalFunc = MADLIB_SCHEMA.__arima_lm_stat_ffunc
-- use NULL as the initial value
);