| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # Builtin function that implements ARIMA |
| # |
| # INPUT PARAMETERS: |
| # ---------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # ---------------------------------------------------------------------------- |
| # X Double --- The input Matrix to apply Arima on. |
| # max_func_invoc Int 1000 ? |
| # p Int 0 non-seasonal AR order |
| # d Int 0 non-seasonal differencing order |
| # q Int 0 non-seasonal MA order |
| # P Int 0 seasonal AR order |
| # D Int 0 seasonal differencing order |
| # Q Int 0 seasonal MA order |
| # s Int 1 period in terms of number of time-steps |
| # include_mean Boolean FALSE center to mean 0, and include in result |
| # solver String jacobi solver, is either "cg" or "jacobi" |
| # |
| # RETURN VALUES |
| # ---------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # ---------------------------------------------------------------------------- |
| # best_point String --- The calculated coefficients |
| # ---------------------------------------------------------------------------- |
| |
| m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0, |
| Integer d=0, Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1, |
| Boolean include_mean=FALSE, String solver="jacobi") |
| return (Matrix[Double] best_point) |
| { |
| totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols |
| #print ("totcols=" + totcols) |
| |
| #TODO: check max_func_invoc < totcols --> print warning (stop here ??) |
| |
| num_rows = nrow(X) |
| #print("nrows of X: " + num_rows) |
| if(num_rows <= d) |
| print("non-seasonal differencing order should be smaller than length of the time-series") |
| |
| mu = 0.0 |
| if(include_mean == 1){ |
| mu = mean(X) |
| X = X - mu |
| } |
| |
| # d-th order differencing: |
| for(i in seq(1,d,1)) |
| X = X[2:nrow(X),] - X[1:nrow(X)-1,] |
| |
| num_rows = nrow(X) |
| if(num_rows <= s*D) |
| print("seasonal differencing order should be smaller than number of observations divided by length of season") |
| |
| for(i in seq(1,D,1)){ |
| n1 = nrow(X)+0.0 |
| X = X[s+1:n1,] - X[1:n1-s,] |
| } |
| |
| n = nrow(X) |
| |
| #Matrix Z with target values of prediction (X) in first column and |
| #all values that can be used to predict a this target value in column 2:totcols of same row |
| Z = cbind(X, matrix(0, n, totcols - 1)) |
| |
| #TODO: This operations can be optimised/simplified |
| |
| #fills Z with values used for non seasonal AR prediction |
| parfor(i1 in seq(1, p, 1), check=0){ |
| Z[i1+1:n,1+i1] = X[1:n-i1,] |
| } |
| |
| #prediciton values for seasonal AR |
| parfor(i2 in seq(1, P, 1), check=0){ |
| Z[s*i2+1:n,1+p+i2] = X[1:n-s*i2,] |
| } |
| |
| #prediciton values for non seasonal MA |
| parfor(i5 in seq(1, q, 1), check=0){ |
| Z[i5+1:n,1+P+p+i5] = X[1:n-i5,] |
| } |
| |
| #prediciton values for seasonal MA |
| parfor(i6 in seq(1, Q, 1), check=0){ |
| Z[s*i6+1:n,1+P+p+q+i6] = X[1:n-s*i6,] |
| } |
| |
| simplex = cbind(matrix(0, totcols-1, 1), diag(matrix(0.1, totcols-1, 1))) |
| |
| num_func_invoc = 0 |
| |
| objvals = matrix(0, 1, ncol(simplex)) |
| parfor(i3 in seq(1,ncol(simplex))){ |
| objvals[1,i3] = arima_css(simplex[,i3], Z, p, P, q, Q, s, solver) |
| } |
| |
| num_func_invoc += ncol(simplex) |
| #print ("num_func_invoc = " + num_func_invoc) |
| tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1]) |
| |
| continue = TRUE |
| while(continue & num_func_invoc <= max_func_invoc){ |
| best_index = as.scalar(rowIndexMin(objvals)) |
| worst_index = as.scalar(rowIndexMax(objvals)) |
| |
| best_obj_val = as.scalar(objvals[1,best_index]) |
| worst_obj_val = as.scalar(objvals[1,worst_index]) |
| |
| continue = (worst_obj_val > best_obj_val + tol) |
| |
| #print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val) |
| |
| c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex)) |
| |
| x_r = 2*c - simplex[,worst_index] |
| obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, solver) |
| num_func_invoc += 1 |
| |
| if(obj_x_r < best_obj_val){ |
| x_e = 2*x_r - c |
| obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, solver) |
| num_func_invoc = num_func_invoc + 1 |
| |
| simplex[,worst_index] = ifelse (obj_x_r <= obj_x_e, x_r, x_e) |
| objvals[1,worst_index] = ifelse (obj_x_r <= obj_x_e, obj_x_r, obj_x_e) |
| } |
| else { |
| if(obj_x_r < worst_obj_val){ |
| simplex[,worst_index] = x_r |
| objvals[1,worst_index] = obj_x_r |
| } |
| |
| x_c_in = (simplex[,worst_index] + c)/2 |
| obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, solver) |
| num_func_invoc += 1 |
| |
| if(obj_x_c_in < as.scalar(objvals[1,worst_index])){ |
| simplex[,worst_index] = x_c_in |
| objvals[1,worst_index] = obj_x_c_in |
| } |
| else if(obj_x_r >= worst_obj_val){ |
| best_point = simplex[,best_index] |
| parfor(i4 in 1:ncol(simplex)){ |
| if(i4 != best_index){ |
| simplex[,i4] = (simplex[,i4] + best_point)/2 |
| objvals[1,i4] = arima_css(simplex[,i4], Z, p, P, q, Q, s, solver) |
| } |
| } |
| num_func_invoc += ncol(simplex) - 1 |
| } |
| } |
| } |
| |
| best_point = simplex[,best_index] |
| if(include_mean) |
| best_point = rbind(best_point, as.matrix(mu)) |
| } |
| |
| # changing to additive sar since R's arima seems to do that |
| arima_css = function(Matrix[Double] w, Matrix[Double] X, |
| Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, String solver) |
| return (Double obj) |
| { |
| b = X[,2:ncol(X)]%*%w |
| R = matrix(0, nrow(X), nrow(X)) |
| for(i7 in seq(1, qIn, 1)){ |
| d_ns = matrix(as.scalar(w[P+pIn+i7,1]), nrow(R)-i7, 1) |
| R[1+i7:nrow(R),1:ncol(R)-i7] = R[1+i7:nrow(R),1:ncol(R)-i7] + diag(d_ns) |
| } |
| |
| for(i8 in seq(1, Q, 1)){ |
| err_ind_s = s*i8 |
| d_s = matrix(as.scalar(w[P+pIn+qIn+i8,1]), nrow(R)-err_ind_s, 1) |
| R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s) |
| } |
| |
| #TODO: provide default dml "solve()" as well |
| solution = eval(solver + "_solver", R, b, 0.01, 100) |
| |
| errs = X[,1] - solution |
| obj = sum(errs*errs) |
| } |
| |
| cg_solver = function (Matrix[Double] R, Matrix[Double] B, Double tolerance, Integer max_iterations) |
| return (Matrix[Double] y_hat) |
| { |
| y_hat = matrix(0, nrow(R), 1) |
| iter = 0 |
| |
| A = R + diag(matrix(1, rows=nrow(R), cols=1)) |
| Z = t(A)%*%A |
| r = -(t(A)%*%B) |
| p = -r |
| norm_r2 = sum(r^2) |
| |
| continue = (norm_r2 != 0) |
| while(iter < max_iterations & continue){ |
| q = Z%*%p |
| alpha = norm_r2 / as.scalar(t(p) %*% q) |
| y_hat += alpha * p |
| r += alpha * q |
| old_norm_r2 = norm_r2 |
| norm_r2 = sum(r^2) |
| continue = (norm_r2 >= tolerance) |
| beta = norm_r2 / old_norm_r2 |
| p = -r + beta * p |
| iter += 1 |
| } |
| } |
| |
| jacobi_solver = function (Matrix[Double] A, Matrix[Double] B, Double tolerance, Integer max_iterations) |
| return (Matrix[Double] y_hat) |
| { |
| y_hat = matrix(0, nrow(A), 1) |
| iter = 0 |
| diff = tolerance+1 |
| |
| #checking for strict diagonal dominance |
| #required for jacobi's method |
| check = sum(rowSums(abs(A)) >= 1) |
| if(check > 0) |
| print("The matrix is not diagonal dominant. Suggest switching to an exact solver.") |
| |
| while(iter < max_iterations & diff > tolerance){ |
| y_hat_new = B - A%*%y_hat |
| diff = sum((y_hat_new-y_hat)^2) |
| y_hat = y_hat_new |
| iter += 1 |
| } |
| } |