src/ports/postgres/modules/tsa/arima.sql_in - madlib - Git at Google

 /* ----------------------------------------------------------------------- *//**
  *
  * @file arima.sql_in
  *
  * @brief Arima function for forecasting of timeseries data
  * @date August 2013
  *
  *//* ------------------------------------------------------------------------*/

 m4_include(`SQLCommon.m4')

 /**
 @addtogroup grp_arima

 <div class ="toc"><b>Contents</b>
 <ul>
 <li class="level1"><a href="#train">Training Function</a></li>
 <li class="level1"><a href="#forecast">Forecasting Function</a></li>
 <li class="level1"><a href="#examples">Examples</a></li>
 <li class="level1"><a href="#background">Technical Background</a></li>
 <li class="level1"><a href="#literature">Literature</a></li>
 <li class="level1"><a href="#related">Related Topics</a></li>
 </ul>
 </div>

 @brief Generates a model with autoregressive, moving average, and integrated
 components for a time series dataset.

 Given a time series of data X, the Autoregressive Integrated Moving Average
 (ARIMA) model is a tool for understanding and, perhaps, predicting future
 values in the series. The model consists of three parts, an autoregressive
 (AR) part, a moving average (MA) part, and an integrated (I) part where an
 initial differencing step can be applied to remove any non-stationarity in the
 signal. The model is generally referred to as an ARIMA(p, d, q) model where
 parameters p, d, and q are non-negative integers that refer to the order of
 the autoregressive, integrated, and moving average parts of the model
 respectively.

 @anchor train
 @par Training Function

 The ARIMA training function has the following syntax.
 <pre class="syntax">
 arima_train( input_table,
        output_table,
        timestamp_column,
        timeseries_column,
        grouping_columns,
        include_mean,
        non_seasonal_orders,
        optimizer_params
      )
 </pre>

 @b Arguments
 <DL class="arglist">
     <DT>input_table</DT>
     <DD>TEXT. The name of the table containing time series data.</DD>

     <DT>output_table</DT>
     <DD>TEXT. The name of the table to store the ARIMA model.
         Three tables are created, with names based on the value of the \e output_table
         argument in the training function:

         -# <em>output_table</em>: Table containing the ARIMA model. Contains the following columns:
         <table class="output">
             <tr>
                 <th>mean</DT>
                     <td>Model mean (only if 'include_mean' is TRUE)</td>
                 </tr>
                 <tr>
                     <th>mean_std_error</th>
                     <td>Standard errors for mean</td>
                 </tr>
                 <tr>
                     <th>ar_params</th>
                     <td>Auto-regressions parameters of the ARIMA model</td>
                 </tr>
                 <tr>
                     <th>ar_std_errors</th>
                     <td>Standard errors for AR parameters</td>
                 </tr>
                 <tr>
                     <th>ma_params</th>
                     <td>Moving average parameters of the ARIMA model</td>
                 </tr>
                 <tr>
                     <th>ma_std_errors</th>
                     <td>Standard errors for MA parameters</td>
                 </tr>
             </table>

         -# <em>output_table</em>_summary: Table containing descriptive statistics of the ARIMA model.
         Contains the following columns:
         <table class="output">
             <tr>
                 <th>input_table</th>
                 <td>Table name with the source data</td>
             </tr>
             <tr>
                 <th>timestamp_col</th>
                 <td>Column name in the source table that contains the timestamp index to data</td>
             </tr>
             <tr>
                 <th>timeseries_col</th>
                 <td>Column name in the source table that contains the data values</td>
             </tr>
             <tr>
                 <th>non_seasonal_orders</th>
                 <td>Orders of the non-seasonal ARIMA model</td>
             </tr>
             <tr>
                 <th>include_mean</th>
                 <td>TRUE if intercept was included in ARIMA model</td>
             </tr>
             <tr>
                 <th>residual_variance</th>
                 <td>Variance of the residuals</td>
             </tr>
             <tr>
                 <th>log_likelihood</th>
                 <td>Log likelihood value (when using MLE)</td>
             </tr>
             <tr>
                 <th>iter_num</th>
                 <td>The number of iterations executed</td>
             </tr>
             <tr>
                 <th>exec_time</th>
                 <td>Total time taken to train the model</td>
             </tr>
         </table>

         -# <em>output_table</em>_residual: Table containing the residuals for each
         data point in 'input_table'. Contains the following columns:
         <table class="output">
             <tr>
                 <th>timestamp_col</th>
                 <td>Same as the 'timestamp_col' parameter
                     (all indices from source table included except the first
                     \e d elements, where \e d is the differencing order value
                     from 'non_seasonal_orders')
                 </td>
             </tr>
             <tr>
                 <th>residual</th>
                 <td>Residual value for each data point</td>
             </tr>
         </table>

     </DD>

     <DT>timestamp_column</DT>
     <DD>TEXT. The name of the column containing the timestamp (index) data.
     This could be a serial index (INTEGER) or date/time value (TIMESTAMP).</DD>

     <DT>timeseries_column</DT>
     <DD>TEXT. The name of the column containing the time series data. This data is
     currently restricted to DOUBLE PRECISION.</DD>

     <DT>grouping_columns (not currently implemented)</DT>
     <DD>TEXT, default: NULL.

     A comma-separated list of column names used to group the input dataset
     into discrete groups, training one ARIMA model per group. It is similar to
     the SQL <tt>GROUP BY</tt> clause. When this value is null, no grouping is
     used and a single result model is generated.

     @note Grouping is not currently implemented for ARIMA, but
     will be added in the future.  Any non-NULL value for this parameter
     is ignored.</em></DD>

     <DT>include_mean (optional)</DT>
     <DD>BOOLEAN, default: FALSE. Mean value of the data series is added in the ARIMA model
     if this variable is True. </DD>

     <DT>non_seasonal_orders (optional)</DT>
     <DD>INTEGER[], default: 'ARRAY[1,1,1]'. Orders of the ARIMA model. The orders are [p, d, q],
     where parameters p, d, and q are non-negative integers that refer to
     the order of the autoregressive, integrated, and moving average parts of the model
     respectively. </DD>

     <DT>optimizer_params (optional)</DT>
     <DD>TEXT. Comma-separated list of optimizer-specific parameters of the form ‘name=value'.
     The order of the parameters does not matter. The following parameters are recognized:
         - \b max_iter: Maximum number of iterations to run learning algorithm
             (Default = 100)
         - \b tau: Computes the initial step size for gradient algorithm (Default = 0.001)
         - \b e1: Algorithm-specific threshold for convergence (Default = 1e-15)
         - \b e2: Algorithm-specific threshold for convergence (Default = 1e-15)
         - \b e3: Algorithm-specific threshold for convergence (Default = 1e-15)
         - \b hessian_delta: Delta parameter to compute a numerical approximation
             of the Hessian matrix (Default = 1e-6)
     </DD>
 </DL>

 @anchor forecast
 @par Forecasting Function

 The ARIMA forecast function has the following syntax.
 <pre class="syntax">
 arima_forecast( model_table,
                 output_table,
                 steps_ahead
               )
 </pre>
 @b Arguments
 <DL class="arglist">
 <DT>model_table</DT>
 <DD>TEXT. The name of the table containing the ARIMA model trained on the
 time series dataset.</DD>

 <DT>output_table</DT>
 <DD>TEXT. The name of the table to store the forecasted values.
 The output table produced by the forecast function contains the following columns.
 <table class="output">
     <tr>
         <th>group_by_cols</th>
         <td>Grouping column values (if grouping parameter is provided)</td>
     </tr>
     <tr>
         <th>step_ahead</th>
         <td>Time step for the forecast</td>
     </tr>
     <tr>
         <th>forecast_value</th>
         <td>Forecast of the current time step</td>
     </tr>
 </table>
 </DD>

 <DT>steps_ahead</DT>
 <DD>INTEGER. The number of steps to forecast at the end of the time series.</DD>
 </DL>

 @anchor examples
 @examp
 -# View online help for the ARIMA training function.
 <pre class="example">
 SELECT madlib.arima_train();
 </pre>

 -#  Create an input data set.
 <pre class="example">
 DROP TABLE IF EXISTS arima_beer;
 CREATE TABLE arima_beer (time_id integer NOT NULL, value double precision NOT NULL );
 COPY arima_beer (time_id, value) FROM stdin WITH DELIMITER '|';
 1  | 93.2
 2  | 96.0
 3  | 95.2
 4  | 77.0
 5  | 70.9
 6  | 64.7
 7  | 70.0
 8  | 77.2
 9  | 79.5
 10 | 100.5
 11 | 100.7
 12 | 107.0
 13 | 95.9
 14 | 82.7
 15 | 83.2
 16 | 80.0
 17 | 80.4
 18 | 67.5
 19 | 75.7
 20 | 71.0
 21 | 89.2
 22 | 101.0
 23 | 105.2
 24 | 114.0
 25 | 96.2
 26 | 84.4
 27 | 91.2
 28 | 81.9
 29 | 80.5
 30 | 70.4
 31 | 74.7
 32 | 75.9
 33 | 86.2
 34 | 98.7
 35 | 100.9
 36 | 113.7
 37 | 89.7
 38 | 84.4
 39 | 87.2
 40 | 85.5
 \\.
 </pre>

 -#  Train an ARIMA model.
 <pre class="example">
 -- Train ARIMA model with 'grouping_columns'=NULL, 'include_mean'=TRUE,
 --   and 'non_seasonal_orders'=[1,1,1]
 SELECT madlib.arima_train( 'arima_beer',
                            'arima_beer_output',
                            'time_id',
                            'value',
                            NULL,
                            FALSE,
                            ARRAY[1, 1, 1]
                          );
 </pre>

 -# Examine the ARIMA model.
 <pre class="example">
 \\x ON
 SELECT * FROM arima_beer_output;
 </pre>
 Result:
 <pre class="result">
 -[ RECORD 1 ]-+------------------
 ar_params     | {0.221954769696}
 ar_std_errors | {0.575367782602}
 ma_params     | {-0.140623564576}
 ma_std_errors | {0.533445214346}
 </pre>

 -#  View the summary statistics table.
 <pre class="example">
 SELECT * FROM arima_beer_output_summary;
 </pre>
 Result:
 <pre class="result">
 -[ RECORD 1 ]-------+---------------
 input_table         | arima_beer
 timestamp_col       | time_id
 timeseries_col      | value
 non_seasonal_orders | {1,1,1}
 include_mean        | f
 residual_variance   | 100.989970539
 log_likelihood      | -145.331516396
 iter_num            | 28
 exec_time (s)       | 2.75
 </pre>

 -# View the residuals.
 <pre class="example">
 \\x OFF
 SELECT * FROM arima_beer_output_residual;
 </pre>
 Result:
 <pre class="result">
  time_id |      residual
 ---------+--------------------
        2 |                  0
        4 |   -18.222328834394
        6 |  -5.49616627282665
 ...
       35 |   1.06298837051437
       37 |  -25.0886854003757
       39 |   3.48401666299571
 (40 rows)
 </pre>

 -# Use the ARIMA forecast function to forecast 10 future values.
 <pre class="example">
 SELECT madlib.arima_forecast( 'arima_beer_output',
                               'arima_beer_forecast_output',
                               10
                             );
 SELECT * FROM arima_beer_forecast_output;
 </pre>
 Result:
 <pre class="result">
  steps_ahead | forecast_value
 -------------+----------------
            1 |  85.3802343659
            3 |  85.3477516875
            5 |  85.3461514635
            7 |  85.3460726302
            9 |  85.3460687465
            2 |  85.3536518121
            4 |  85.3464421267
            6 |  85.3460869494
            8 |  85.3460694519
           10 |    85.34606859
 (10 rows)
 </pre>


 @anchor background
 @par Technical Background
 An ARIMA model is an <em>a</em>uto-<em>r</em>egressive <em>i</em>ntegrated
 <em>m</em>oving <em>a</em>verage model.  An ARIMA model is typically expressed
 in the form
 \f[
 (1 - \phi(B)) Y_t  = (1 + \theta(B)) Z_t,
 \f]

 where \f$B\f$ is the backshift operator. The time \f$ t \f$ is from \f$ 1 \f$
 to \f$ N \f$.

 ARIMA models involve the following variables:
 - The values of the time series: \f$ X_t \f$.
 - Parameters of the model: \f$ p \f$, \f$ q \f$, and \f$ d \f$;
       \f$ d \f$ is the differencing order, \f$ p \f$ is the order of the AR
       operator, and \f$ q \f$ is the order of the MA operator.
 - The AR operator: \f$ \phi(B) \f$.
 - The MA operator: \f$ \theta(B) \f$.
 - The lag difference: \f$ Y_{t} \f$, where  \f$ Y_{t} = (1-B)^{d}(X_{t} - \mu) \f$.
 - The mean value: \f$ \mu \f$, which is set to be zero for
       \f$ d>0 \f$ and estimated from the data when d=0.
 - The error terms: \f$ Z_t \f$.

 The auto regression operator models the prediction for the next observation as
 some linear combination of the previous observations. More formally, an AR
 operator of order \f$ p \f$ is defined as

 \f[
 \phi(B) Y_t= \phi_1 Y_{t-1}   + \dots +  \phi_{p} Y_{t-p}
 \f]

 The moving average operator is similar, and it models the prediction
 for the next observation as a linear combination of the errors in the
 previous prediction errors.  More formally, the MA operator of order
 \f$ q \f$ is defined as

 \f[
 \theta(B) Z_t =   \theta_{1} Z_{t-1} + \dots + \theta_{q} Z_{t-q}.
 \f]

 We estimate the parameters using the Levenberg-Marquardt Algorithm.
 In mathematics and computing, the Levenberg-Marquardt algorithm (LMA),
 also known as the damped least-squares (DLS) method, provides a
 numerical solution to the problem of minimizing a function, generally
 nonlinear, over a space of parameters of the function.

 Like other numeric minimization algorithms, LMA is an iterative
 procedure.  To start a minimization, the user has to provide an
 initial guess for the parameter vector, $p$, as well as some tuning
 parameters \f$\tau, \epsilon_1, \epsilon_2, \epsilon_3,\f$.

 @anchor literature
 @par Literature

 [1] Rob J Hyndman and George Athanasopoulos: Forecasting: principles and practice,
 http://otexts.com/fpp/

 [2] Robert H. Shumway, David S. Stoffer: Time Series Analysis and Its
 Applications With R Examples, Third edition Springer Texts in Statistics, 2010

 [3] Henri Gavin: The Levenberg-Marquardt method for nonlinear least squares
 curve-fitting problems, 2011

 @anchor related
 @par Related Topics

 File arima.sql_in documenting the ARIMA functions


 */


 -----------------------------------------------------------------------
 -- ARIMA TRAIN FUNCTIONS
 -----------------------------------------------------------------------

 /*
 @brief Estimate ARIMA parameters from a timeseries data

 */
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.arima_train(
     input_table     TEXT,       -- Source table name
     output_table    TEXT,       -- Output table prefix to
                                 --    diagnostic and residual table
                                 --    for the ARIMA model
                                 --    Tables created are:
                                 --    output_table
                                 --    <output_table>_summary
                                 --    <output_table>_residual
     timestamp_column   TEXT,       -- Index column (e.g. timestamp) that
                                 -- orders the time series
     timeseries_column  TEXT,       -- Column that contains the values
                                 -- of the time series
     grouping_columns   TEXT,       -- Comma-separated list of grouping
                                 -- columns (Default: NULL)
     include_mean    BOOLEAN,    -- Boolean to control if constant
                                 -- mean is
                                 --    to included in the model
                                 --    (Default: False)
     non_seasonal_orders INTEGER[],  -- Array of non-seasonal AR, I, and
                                     -- MA orders (Default: ARRAY[1,1,1])
     optimizer_params    TEXT        -- Control parameters for optimizer
 )
 RETURNS VOID AS $$
 PythonFunctionBodyOnly(`tsa', `arima')
     with AOControl(False):
         arima.arima_train(schema_madlib, input_table, output_table,
                           timestamp_column, timeseries_column, grouping_columns,
                           include_mean, non_seasonal_orders, optimizer_params)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -- other optional parameters that can be added in future
 --      estimation_method     TEXT,      -- Parameter estimation
 --                                       -- algorithm (must be one of
 --                                       -- 'mle', 'uls', 'cls')
 --      seasonal_pdq          INTEGER[], -- Array of seasonal AR, I,
 --                                       -- MA orders and
 --                                       -- seasonal cycle length
 --                                       -- (Default: ARRAY[0,0,0,0])

 -------------------------------------------------------------------------

 -- Overloaded functions--

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.arima_train(
     input_table         TEXT,
     output_table        TEXT,
     timestamp_column       TEXT,
     timeseries_column      TEXT,
     grouping_columns       TEXT,
     include_mean        BOOLEAN,
     non_seasonal_orders INTEGER[]
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, $5, $6,
                                      $7, NULL)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 ----------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.arima_train(
     input_table      TEXT,
     output_table     TEXT,
     timestamp_column    TEXT,
     timeseries_column   TEXT,
     grouping_columns         TEXT,
     include_mean     BOOLEAN
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, $5, $6,
                                     ARRAY[1,1,1], NULL)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.arima_train(
     input_table      TEXT,
     output_table     TEXT,
     timestamp_column    TEXT,
     timeseries_column   TEXT,
     grouping_columns         TEXT
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, $5, False,
                                     ARRAY[1,1,1], NULL)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.arima_train(
     input_table      TEXT,
     output_table     TEXT,
     timestamp_column    TEXT,
     timeseries_column   TEXT
 )
 RETURNS VOID AS $$
     SELECT MADLIB_SCHEMA.arima_train($1, $2, $3, $4, NULL, False,
                                     ARRAY[1,1,1], NULL)
 $$ LANGUAGE sql VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -------------------------------------------------------------------------
 /*
 @brief Forecast timeseries using an ARIMA model
 */
 CREATE OR REPLACE FUNCTION
 MADLIB_SCHEMA.arima_forecast(
     model_table    TEXT,   -- Table name containing the ARIMA model
     output_table   TEXT,   -- Output table name for forecasting by
                                 -- the ARIMA model
     steps_ahead         INTEGER -- Number of steps to forecast ahead
 )
 RETURNS VOID AS $$
 PythonFunctionBodyOnly(`tsa', `arima_forecast')
     with AOControl(False):
         arima_forecast.arima_forecast(schema_madlib, model_table, output_table,
                                       steps_ahead)
 $$ LANGUAGE plpythonu VOLATILE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `MODIFIES SQL DATA', `');

 -------------------------------------------------------------------------
 -- Help functions  ------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_train(
     message            TEXT
 )
 RETURNS TEXT AS $$
 PythonFunctionBodyOnly(`tsa', `arima')
     with AOControl(False):
         return arima.arima_train_help_message(schema_madlib, message)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');


 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_train()
 RETURNS TEXT AS $$
 PythonFunction(tsa, arima, arima_train_help_message)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_forecast(
     message            TEXT
 )
 RETURNS TEXT AS $$
 PythonFunctionBodyOnly(`tsa', `arima_forecast')
     with AOControl(False):
         return arima_forecast.arima_forecast_help_message(schema_madlib, message)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');


 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.arima_forecast()
 RETURNS TEXT AS $$
 PythonFunctionBodyOnly(`tsa', `arima_forecast')
     with AOControl(False):
         return arima_forecast.arima_forecast_help_message(schema_madlib, None)
 $$ LANGUAGE plpythonu IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `CONTAINS SQL', `');

 ------------------------------------------------------------------------
 ------------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_residual
 (
     distid     INTEGER,
     tvals   DOUBLE PRECISION[],
     p       INTEGER,
     d       INTEGER,
     q       INTEGER,
     phi     DOUBLE PRECISION[],
     theta   DOUBLE PRECISION[],
     mean    DOUBLE PRECISION,
     prez    DOUBLE PRECISION[]
 )
 RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_residual'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 -------------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_diff
 (
     tvals   DOUBLE PRECISION[],
     d       INTEGER
 )
 RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_diff'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 -------------------------------------------------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_adjust
 (
     distid        INTEGER,
     curr_tvals    DOUBLE PRECISION[],
     prev_tvals    DOUBLE PRECISION[],
     p             INTEGER
 )
 RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_adjust'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 -------------------------------------------------------------------------

 -- Compute the vector delta, the change of coefficients
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_delta
 (
     jj  DOUBLE PRECISION[],   -- J^T * J
     jz   DOUBLE PRECISION[],   -- J^T * Z
     u   DOUBLE PRECISION      -- Weight of gradient-descent step
 )
 RETURNS DOUBLE PRECISION[] AS 'MODULE_PATHNAME', 'arima_lm_delta'
 LANGUAGE C STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 ----------------------------------------------------------------------
 -- One step of LM algorithm over a chunk
 ----------------------------------------------------------------------

 DROP TYPE IF EXISTS MADLIB_SCHEMA.__arima_lm_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.__arima_lm_result AS
 (
     z2      DOUBLE PRECISION,
     jj      DOUBLE PRECISION[],
     jz      DOUBLE PRECISION[],
     prez    DOUBLE PRECISION[],
     prej    DOUBLE PRECISION[]
 );

 --------------------------------
 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm
 (
     distid  INTEGER,    -- Time series ID
     tvals   DOUBLE PRECISION[],   -- Time series values
     p       INTEGER,    -- P
     q       INTEGER,    -- Q
     phi     DOUBLE PRECISION[],   -- Phi
     theta   DOUBLE PRECISION[],   -- Theta
     mean    DOUBLE PRECISION,     -- Mean
     prez    DOUBLE PRECISION[],   -- Previous Z
     prej    DOUBLE PRECISION[]   -- Previous J
 )
 RETURNS MADLIB_SCHEMA.__arima_lm_result
 AS 'MODULE_PATHNAME', 'arima_lm'
 LANGUAGE C
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 ----------------------------------------------------------------------
 -- Accumulate results from all chunks
 ----------------------------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_result_sfunc (
     state_data  DOUBLE PRECISION[],
     jj          DOUBLE PRECISION[],
     jz          DOUBLE PRECISION[],
     z2          DOUBLE PRECISION
 ) RETURNS DOUBLE PRECISION[] AS
     'MODULE_PATHNAME', 'arima_lm_result_sfunc'
 LANGUAGE C IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 ------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_result_pfunc (
     state1      DOUBLE PRECISION[],
     state2      DOUBLE PRECISION[]
 ) RETURNS DOUBLE PRECISION[] AS
     'MODULE_PATHNAME', 'arima_lm_result_pfunc'
 LANGUAGE C IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 ------------------------------------------------

 DROP TYPE IF EXISTS MADLIB_SCHEMA.__arima_lm_sum_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.__arima_lm_sum_result AS (
     jj          DOUBLE PRECISION[],
     jz          DOUBLE PRECISION[],
     z2          DOUBLE PRECISION,
     u           DOUBLE PRECISION
 );

 ------------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_result_ffunc (
     state_data  DOUBLE PRECISION[]
 ) RETURNS MADLIB_SCHEMA.__arima_lm_sum_result AS
     'MODULE_PATHNAME', 'arima_lm_result_ffunc'
 LANGUAGE C IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 ------------------------------------------------

 DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__arima_lm_result_agg (
     /* jj */        DOUBLE PRECISION[],
     /* jz */        DOUBLE PRECISION[],
     /* z2 */        DOUBLE PRECISION
 );

 CREATE AGGREGATE MADLIB_SCHEMA.__arima_lm_result_agg (
     /* jj */        DOUBLE PRECISION[],
     /* jz */        DOUBLE PRECISION[],
     /* z2 */        DOUBLE PRECISION
 ) (
     SType = DOUBLE PRECISION[],
     SFunc = MADLIB_SCHEMA.__arima_lm_result_sfunc,
     m4_ifdef(`__POSTGRESQL__', `', `PreFunc = MADLIB_SCHEMA.__arima_lm_result_pfunc,')
     FinalFunc = MADLIB_SCHEMA.__arima_lm_result_ffunc
     -- use NULL as the initial value
 );

 ----------------------------------------------------------------------

 -- Aggregate function to compute statistics

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_stat_sfunc (
     state_data      DOUBLE PRECISION[],
     distid          INTEGER,
     tvals           DOUBLE PRECISION[],
     p               INTEGER,
     q               INTEGER,
     phi             DOUBLE PRECISION[],
     theta           DOUBLE PRECISION[],
     mean            DOUBLE PRECISION,
     delta           DOUBLE PRECISION
 ) RETURNS DOUBLE PRECISION[] AS
     'MODULE_PATHNAME', 'arima_lm_stat_sfunc'
 LANGUAGE C IMMUTABLE
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 -------------------------------------------

 DROP TYPE IF EXISTS MADLIB_SCHEMA.__arima_lm_stat_result CASCADE;
 CREATE TYPE MADLIB_SCHEMA.__arima_lm_stat_result AS (
     std_errs        DOUBLE PRECISION[],
     resid_var       DOUBLE PRECISION,
     loglik          DOUBLE PRECISION
 );

 -------------------------------------------

 CREATE OR REPLACE FUNCTION MADLIB_SCHEMA.__arima_lm_stat_ffunc (
     state_data      DOUBLE PRECISION[]
 ) RETURNS MADLIB_SCHEMA.__arima_lm_stat_result AS
     'MODULE_PATHNAME', 'arima_lm_stat_ffunc'
 LANGUAGE C IMMUTABLE STRICT
 m4_ifdef(`__HAS_FUNCTION_PROPERTIES__', `NO SQL', `');


 ------------------------------------------

 DROP AGGREGATE IF EXISTS MADLIB_SCHEMA.__arima_lm_stat_agg (
     /* distid */    INTEGER,
     /* tvals */     DOUBLE PRECISION[],
     /* p */         INTEGER,
     /* q */         INTEGER,
     /* phi */       DOUBLE PRECISION[],
     /* theta */     DOUBLE PRECISION[],
     /* mean */      DOUBLE PRECISION,
     /* delta */     DOUBLE PRECISION
 );
 CREATE m4_ifdef(`__POSTGRESQL__', `',
     m4_ifdef(`__HAS_ORDERED_AGGREGATES__', `ORDERED')) AGGREGATE
 MADLIB_SCHEMA.__arima_lm_stat_agg (
     /* distid */    INTEGER,
     /* tvals */     DOUBLE PRECISION[],
     /* p */         INTEGER,
     /* q */         INTEGER,
     /* phi */       DOUBLE PRECISION[],
     /* theta */     DOUBLE PRECISION[],
     /* mean */      DOUBLE PRECISION,
     /* delta */     DOUBLE PRECISION
 ) (
     SType = DOUBLE PRECISION[],
     SFunc = MADLIB_SCHEMA.__arima_lm_stat_sfunc,
     FinalFunc = MADLIB_SCHEMA.__arima_lm_stat_ffunc
     -- use NULL as the initial value
 );