blob: de0ebeccc89e0f07fab84e80adbceb29854d3726 [file] [log] [blame]
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# Builtin function that implements ARIMA
#
# INPUT PARAMETERS:
# ----------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ----------------------------------------------------------------------------
# X Double --- The input Matrix to apply Arima on.
# max_func_invoc Int 1000 ?
# p Int 0 non-seasonal AR order
# d Int 0 non-seasonal differencing order
# q Int 0 non-seasonal MA order
# P Int 0 seasonal AR order
# D Int 0 seasonal differencing order
# Q Int 0 seasonal MA order
# s Int 1 period in terms of number of time-steps
# include_mean Boolean FALSE center to mean 0, and include in result
# solver String jacobi solver, is either "cg" or "jacobi"
#
# RETURN VALUES
# ----------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ----------------------------------------------------------------------------
# best_point String --- The calculated coefficients
# ----------------------------------------------------------------------------
m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
Integer d=0, Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1,
Boolean include_mean=FALSE, String solver="jacobi")
return (Matrix[Double] best_point)
{
totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
#print ("totcols=" + totcols)
#TODO: check max_func_invoc < totcols --> print warning (stop here ??)
num_rows = nrow(X)
#print("nrows of X: " + num_rows)
if(num_rows <= d)
print("non-seasonal differencing order should be smaller than length of the time-series")
mu = 0.0
if(include_mean == 1){
mu = mean(X)
X = X - mu
}
# d-th order differencing:
for(i in seq(1,d,1))
X = X[2:nrow(X),] - X[1:nrow(X)-1,]
num_rows = nrow(X)
if(num_rows <= s*D)
print("seasonal differencing order should be smaller than number of observations divided by length of season")
for(i in seq(1,D,1)){
n1 = nrow(X)+0.0
X = X[s+1:n1,] - X[1:n1-s,]
}
n = nrow(X)
#Matrix Z with target values of prediction (X) in first column and
#all values that can be used to predict a this target value in column 2:totcols of same row
Z = cbind(X, matrix(0, n, totcols - 1))
#TODO: This operations can be optimised/simplified
#fills Z with values used for non seasonal AR prediction
parfor(i1 in seq(1, p, 1), check=0){
Z[i1+1:n,1+i1] = X[1:n-i1,]
}
#prediciton values for seasonal AR
parfor(i2 in seq(1, P, 1), check=0){
Z[s*i2+1:n,1+p+i2] = X[1:n-s*i2,]
}
#prediciton values for non seasonal MA
parfor(i5 in seq(1, q, 1), check=0){
Z[i5+1:n,1+P+p+i5] = X[1:n-i5,]
}
#prediciton values for seasonal MA
parfor(i6 in seq(1, Q, 1), check=0){
Z[s*i6+1:n,1+P+p+q+i6] = X[1:n-s*i6,]
}
simplex = cbind(matrix(0, totcols-1, 1), diag(matrix(0.1, totcols-1, 1)))
num_func_invoc = 0
objvals = matrix(0, 1, ncol(simplex))
parfor(i3 in seq(1,ncol(simplex))){
objvals[1,i3] = arima_css(simplex[,i3], Z, p, P, q, Q, s, solver)
}
num_func_invoc += ncol(simplex)
#print ("num_func_invoc = " + num_func_invoc)
tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1])
continue = TRUE
while(continue & num_func_invoc <= max_func_invoc){
best_index = as.scalar(rowIndexMin(objvals))
worst_index = as.scalar(rowIndexMax(objvals))
best_obj_val = as.scalar(objvals[1,best_index])
worst_obj_val = as.scalar(objvals[1,worst_index])
continue = (worst_obj_val > best_obj_val + tol)
#print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
x_r = 2*c - simplex[,worst_index]
obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, solver)
num_func_invoc += 1
if(obj_x_r < best_obj_val){
x_e = 2*x_r - c
obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, solver)
num_func_invoc = num_func_invoc + 1
simplex[,worst_index] = ifelse (obj_x_r <= obj_x_e, x_r, x_e)
objvals[1,worst_index] = ifelse (obj_x_r <= obj_x_e, obj_x_r, obj_x_e)
}
else {
if(obj_x_r < worst_obj_val){
simplex[,worst_index] = x_r
objvals[1,worst_index] = obj_x_r
}
x_c_in = (simplex[,worst_index] + c)/2
obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, solver)
num_func_invoc += 1
if(obj_x_c_in < as.scalar(objvals[1,worst_index])){
simplex[,worst_index] = x_c_in
objvals[1,worst_index] = obj_x_c_in
}
else if(obj_x_r >= worst_obj_val){
best_point = simplex[,best_index]
parfor(i4 in 1:ncol(simplex)){
if(i4 != best_index){
simplex[,i4] = (simplex[,i4] + best_point)/2
objvals[1,i4] = arima_css(simplex[,i4], Z, p, P, q, Q, s, solver)
}
}
num_func_invoc += ncol(simplex) - 1
}
}
}
best_point = simplex[,best_index]
if(include_mean)
best_point = rbind(best_point, as.matrix(mu))
}
# changing to additive sar since R's arima seems to do that
arima_css = function(Matrix[Double] w, Matrix[Double] X,
Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, String solver)
return (Double obj)
{
b = X[,2:ncol(X)]%*%w
R = matrix(0, nrow(X), nrow(X))
for(i7 in seq(1, qIn, 1)){
d_ns = matrix(as.scalar(w[P+pIn+i7,1]), nrow(R)-i7, 1)
R[1+i7:nrow(R),1:ncol(R)-i7] = R[1+i7:nrow(R),1:ncol(R)-i7] + diag(d_ns)
}
for(i8 in seq(1, Q, 1)){
err_ind_s = s*i8
d_s = matrix(as.scalar(w[P+pIn+qIn+i8,1]), nrow(R)-err_ind_s, 1)
R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
}
#TODO: provide default dml "solve()" as well
solution = eval(solver + "_solver", R, b, 0.01, 100)
errs = X[,1] - solution
obj = sum(errs*errs)
}
cg_solver = function (Matrix[Double] R, Matrix[Double] B, Double tolerance, Integer max_iterations)
return (Matrix[Double] y_hat)
{
y_hat = matrix(0, nrow(R), 1)
iter = 0
A = R + diag(matrix(1, rows=nrow(R), cols=1))
Z = t(A)%*%A
r = -(t(A)%*%B)
p = -r
norm_r2 = sum(r^2)
continue = (norm_r2 != 0)
while(iter < max_iterations & continue){
q = Z%*%p
alpha = norm_r2 / as.scalar(t(p) %*% q)
y_hat += alpha * p
r += alpha * q
old_norm_r2 = norm_r2
norm_r2 = sum(r^2)
continue = (norm_r2 >= tolerance)
beta = norm_r2 / old_norm_r2
p = -r + beta * p
iter += 1
}
}
jacobi_solver = function (Matrix[Double] A, Matrix[Double] B, Double tolerance, Integer max_iterations)
return (Matrix[Double] y_hat)
{
y_hat = matrix(0, nrow(A), 1)
iter = 0
diff = tolerance+1
#checking for strict diagonal dominance
#required for jacobi's method
check = sum(rowSums(abs(A)) >= 1)
if(check > 0)
print("The matrix is not diagonal dominant. Suggest switching to an exact solver.")
while(iter < max_iterations & diff > tolerance){
y_hat_new = B - A%*%y_hat
diff = sum((y_hat_new-y_hat)^2)
y_hat = y_hat_new
iter += 1
}
}