[SYSTEMDS-2773] New arima built-in function (time series forecasting)

DIA project WS2020/21.
Closes #1137.
diff --git a/scripts/builtin/arima.dml b/scripts/builtin/arima.dml
new file mode 100644
index 0000000..56dd6a1
--- /dev/null
+++ b/scripts/builtin/arima.dml
@@ -0,0 +1,251 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Builtin function that implements ARIMA
+#
+# INPUT PARAMETERS:
+# ----------------------------------------------------------------------------
+# NAME              TYPE      DEFAULT   MEANING
+# ----------------------------------------------------------------------------
+# X                 Double    ---       The input Matrix to apply Arima on.
+# max_func_invoc    Int       1000      ?
+# p                 Int       0         non-seasonal AR order
+# d                 Int       0         non-seasonal differencing order
+# q                 Int       0         non-seasonal MA order
+# P                 Int       0         seasonal AR order
+# D                 Int       0         seasonal differencing order
+# Q                 Int       0         seasonal MA order
+# s                 Int       1         period in terms of number of time-steps
+# include_mean      Boolean   FALSE     center to mean 0, and include in result
+# solver            String    jacobi    solver, is either "cg" or "jacobi"
+#
+# RETURN VALUES
+# ----------------------------------------------------------------------------
+# NAME         TYPE   DEFAULT  MEANING
+# ----------------------------------------------------------------------------
+# best_point  String    ---    The calculated coefficients
+# ----------------------------------------------------------------------------
+
+m_arima = function(Matrix[Double] X, Integer max_func_invoc=1000, Integer p=0,
+  Integer d=0, Integer q=0, Integer P=0, Integer D=0, Integer Q=0, Integer s=1,
+  Boolean include_mean=FALSE, String solver="jacobi")
+  return (Matrix[Double] best_point)
+{
+  totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
+  #print ("totcols=" + totcols)
+
+  #TODO: check max_func_invoc < totcols --> print warning (stop here ??)
+
+  num_rows = nrow(X)
+  #print("nrows of X: " + num_rows)
+  if(num_rows <= d)
+    print("non-seasonal differencing order should be smaller than length of the time-series")
+
+  mu = 0.0
+  if(include_mean == 1){
+    mu = mean(X)
+    X = X - mu
+  }
+
+  # d-th order differencing:
+  for(i in seq(1,d,1))
+    X = X[2:nrow(X),] - X[1:nrow(X)-1,]
+
+  num_rows = nrow(X)
+  if(num_rows <= s*D)
+    print("seasonal differencing order should be smaller than number of observations divided by length of season")
+
+  for(i in seq(1,D,1)){
+    n1 = nrow(X)+0.0
+    X = X[s+1:n1,] - X[1:n1-s,]
+  }
+
+  n = nrow(X)
+
+  #Matrix Z with target values of prediction (X) in first column and
+  #all values that can be used to predict a this target value in column 2:totcols of same row
+  Z = cbind(X, matrix(0, n, totcols - 1))
+
+  #TODO: This operations can be optimised/simplified
+
+  #fills Z with values used for non seasonal AR prediction
+  parfor(i1 in seq(1, p, 1), check=0){
+    Z[i1+1:n,1+i1] = X[1:n-i1,]
+  }
+
+  #prediciton values for seasonal AR
+  parfor(i2 in seq(1, P, 1), check=0){
+    Z[s*i2+1:n,1+p+i2] = X[1:n-s*i2,]
+  }
+
+  #prediciton values for non seasonal MA
+  parfor(i5 in seq(1, q, 1), check=0){
+    Z[i5+1:n,1+P+p+i5] = X[1:n-i5,]
+  }
+
+  #prediciton values for seasonal MA
+  parfor(i6 in seq(1,Q, 1), check=0){
+    Z[s*i6+1:n,1+P+p+q+i6] = X[1:n-s*i6,]
+  }
+
+  simplex = cbind(matrix(0, totcols-1, 1), diag(matrix(0.1, totcols-1, 1)))
+
+  num_func_invoc = 0
+
+  objvals = matrix(0, 1, ncol(simplex))
+  parfor(i3 in seq(1,ncol(simplex))){
+    objvals[1,i3] = arima_css(simplex[,i3], Z, p, P, q, Q, s, solver)
+  }
+
+  num_func_invoc += ncol(simplex)
+  #print ("num_func_invoc = " + num_func_invoc)
+  tol = 1.5 * 10^(-8) * as.scalar(objvals[1,1])
+
+  continue = TRUE
+  while(continue & num_func_invoc <= max_func_invoc){
+    best_index = as.scalar(rowIndexMin(objvals))
+    worst_index = as.scalar(rowIndexMax(objvals))
+
+    best_obj_val = as.scalar(objvals[1,best_index])
+    worst_obj_val = as.scalar(objvals[1,worst_index])
+
+    continue = (worst_obj_val > best_obj_val + tol)
+
+    #print("#Function calls::" + num_func_invoc + " OBJ: " + best_obj_val)
+
+    c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+
+    x_r = 2*c - simplex[,worst_index]
+    obj_x_r = arima_css(x_r, Z, p, P, q, Q, s, solver)
+    num_func_invoc += 1
+
+    if(obj_x_r < best_obj_val){
+      x_e = 2*x_r - c
+      obj_x_e = arima_css(x_e, Z, p, P, q, Q, s, solver)
+      num_func_invoc = num_func_invoc + 1
+
+      simplex[,worst_index] = ifelse (obj_x_r <= obj_x_e, x_r, x_e)
+      objvals[1,worst_index] = ifelse (obj_x_r <= obj_x_e, obj_x_r, obj_x_e)
+    }
+    else {
+      if(obj_x_r < worst_obj_val){
+        simplex[,worst_index] = x_r
+        objvals[1,worst_index] = obj_x_r
+      }
+
+      x_c_in = (simplex[,worst_index] + c)/2
+      obj_x_c_in = arima_css(x_c_in, Z, p, P, q, Q, s, solver)
+      num_func_invoc += 1
+
+      if(obj_x_c_in < as.scalar(objvals[1,worst_index])){
+        simplex[,worst_index] = x_c_in
+        objvals[1,worst_index] = obj_x_c_in
+      }
+      else if(obj_x_r >= worst_obj_val){
+        best_point = simplex[,best_index]
+        parfor(i4 in 1:ncol(simplex)){
+          if(i4 != best_index){
+            simplex[,i4] = (simplex[,i4] + best_point)/2
+            objvals[1,i4] = arima_css(simplex[,i4], Z, p, P, q, Q, s, solver)
+          }
+        }
+        num_func_invoc += ncol(simplex) - 1
+      }
+    }
+  }
+
+  best_point = simplex[,best_index]
+  if(include_mean)
+    best_point = rbind(best_point, as.matrix(mu))
+}
+
+ # changing to additive sar since R's arima seems to do that
+arima_css = function(Matrix[Double] w, Matrix[Double] X,
+  Integer pIn, Integer P, Integer qIn, Integer Q, Integer s, String solver) 
+  return (Double obj)
+{
+  b = X[,2:ncol(X)]%*%w
+  R = matrix(0, nrow(X), nrow(X))
+  for(i7 in seq(1, qIn, 1)){
+    d_ns = matrix(as.scalar(w[P+pIn+i7,1]), nrow(R)-i7, 1)
+    R[1+i7:nrow(R),1:ncol(R)-i7] = R[1+i7:nrow(R),1:ncol(R)-i7] + diag(d_ns)
+  }
+
+  for(i8 in seq(1, Q, 1)){
+    err_ind_s = s*i8
+    d_s = matrix(as.scalar(w[P+pIn+qIn+i8,1]), nrow(R)-err_ind_s, 1)
+    R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] = R[1+err_ind_s:nrow(R),1:ncol(R)-err_ind_s] + diag(d_s)
+  }
+
+  #TODO: provide default dml "solve()" as well
+  solution = eval(solver + "_solver", R, b, 0.01, 100)
+
+  errs = X[,1] - solution
+  obj = sum(errs*errs)
+}
+
+cg_solver = function (Matrix[Double] R, Matrix[Double] B, Double tolerance, Integer max_iterations)
+  return (Matrix[Double] y_hat)
+{
+  y_hat = matrix(0, nrow(R), 1)
+  iter = 0
+
+  A = R + diag(matrix(1, rows=nrow(R), cols=1))
+  Z = t(A)%*%A
+  r = -(t(A)%*%B)
+  p = -r
+  norm_r2 = sum(r^2)
+
+  continue = (norm_r2 != 0)
+  while(iter < max_iterations & continue){
+    q = Z%*%p
+    alpha = norm_r2 / as.scalar(t(p) %*% q)
+    y_hat += alpha * p
+    r += alpha * q
+    old_norm_r2 = norm_r2
+    norm_r2 = sum(r^2)
+    continue = (norm_r2 >= tolerance)
+    beta = norm_r2 / old_norm_r2
+    p = -r + beta * p
+    iter += 1
+  }
+}
+
+jacobi_solver = function (Matrix[Double] A, Matrix[Double] B, Double tolerance, Integer max_iterations)
+  return (Matrix[Double] y_hat)
+{
+  y_hat = matrix(0, nrow(A), 1)
+  iter = 0
+  diff = tolerance+1
+
+  #checking for strict diagonal dominance
+  #required for jacobi's method
+  check = sum(rowSums(abs(A)) >= 1)
+  if(check > 0)
+    print("The matrix is not diagonal dominant. Suggest switching to an exact solver.")
+
+  while(iter < max_iterations & diff > tolerance){
+    y_hat_new = B - A%*%y_hat
+    diff = sum((y_hat_new-y_hat)^2)
+    y_hat = y_hat_new
+    iter += 1
+  }
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java
index c63fa45..bb96100 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -38,6 +38,7 @@
  */
 public enum Builtins {
 	//builtin functions
+	ARIMA("arima", true),
 	ABS("abs", false),
 	GET_ACCURACY("getAccuracy", true),
 	ABSTAIN("abstain", true),
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinArimaTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinArimaTest.java
new file mode 100644
index 0000000..f6f9d2e
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinArimaTest.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+
+import org.junit.Test;
+import org.apache.sysds.common.Types;
+import org.apache.sysds.lops.LopProperties;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestConfiguration;
+import org.apache.sysds.test.TestUtils;
+
+@RunWith(value = Parameterized.class)
+public class BuiltinArimaTest extends AutomatedTestBase {
+	private final static String TEST_NAME = "arima";
+	private final static String TEST_DIR = "functions/builtin/";
+	private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinArimaTest.class.getSimpleName() + "/";
+
+	protected int max_func_invoc, p, d, q, P, D, Q, s, include_mean, useJacobi;
+
+	public BuiltinArimaTest(int m, int p, int d, int q, int P, int D, int Q, int s, int include_mean, int useJacobi){
+		this.max_func_invoc = m;
+		this.p = p;
+		this.d = d;
+		this.q = q;
+		this.P = P;
+		this.D = D;
+		this.Q = Q;
+		this.s = s;
+		this.include_mean = include_mean;
+		this.useJacobi = useJacobi;
+	}
+
+	@Parameters
+	public static Collection<Object[]> data() {
+		return Arrays.asList(new Object[][] {
+				{20, 1, 0, 0, 0, 0, 0, 24, 1, 1},
+				{20, 0, 0, 1, 0, 0, 0, 24, 1, 1},
+				{20, 2, 0, 1, 0, 0, 0, 24, 1, 1},
+				
+				//TODO fix remaining configurations (e.g., differencing)
+				//{10, 1, 0, 10, 0, 0, 0, 24, 1, 1}
+				// {10, 1, 1, 2, 0, 0, 0, 24, 1, 1},
+				// {10, 0, 1, 2, 0, 0, 0, 24, 1, 1},
+				// {10, 0, 0, 0, 1, 1, 0, 24, 1, 1},
+				// {10, 0, 0, 0, 1, 1, 2, 24, 1, 1},
+				// {10, 0, 0, 0, 0, 1, 2, 24, 1, 1}}
+		});
+	}
+
+	@Override
+	public void setUp() {
+		addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[]{"B"}));
+	}
+
+	@Test
+	public void testArima(){
+		Types.ExecMode platformOld = setExecMode(LopProperties.ExecType.CP);
+		try {
+			loadTestConfiguration(getTestConfiguration(TEST_NAME));
+			String HOME = SCRIPT_DIR + TEST_DIR;
+			fullDMLScriptName = HOME + TEST_NAME + ".dml";
+			fullRScriptName = HOME + TEST_NAME + ".R";
+
+			programArgs = new String[]{
+					"-nvargs", "X=" + input("col.mtx"), "max_func_invoc=" + max_func_invoc,
+					"p=" + p, "d=" + d, "q=" + q, "P=" + P, "D=" + D, "Q=" + Q,
+					"s=" + s, "include_mean=" + include_mean, "useJacobi=" + useJacobi,
+					"model=" + output("learnt.model"),};
+
+			rCmd = getRCmd(input("col.mtx"), Integer.toString(max_func_invoc), Integer.toString(p),
+				Integer.toString(d), Integer.toString(q), Integer.toString(P), Integer.toString(D),
+				Integer.toString(Q), Integer.toString(s), Integer.toString(include_mean),
+				Integer.toString(useJacobi), expected("learnt.model"));
+
+			int timeSeriesLength = 3000;
+			double[][] timeSeries = getRandomMatrix(timeSeriesLength, 1, 1, 5, 0.9, 54321);
+
+			MatrixCharacteristics mc = new MatrixCharacteristics(timeSeriesLength,1,-1,-1);
+			writeInputMatrixWithMTD("col", timeSeries, true, mc);
+
+			runTest(true, false, null, -1);
+			runRScript(true);
+
+			double tol = Math.pow(10, -14);
+			HashMap<CellIndex, Double> arima_model_R = readRMatrixFromExpectedDir("learnt.model");
+			HashMap<CellIndex, Double> arima_model_SYSTEMDS= readDMLMatrixFromOutputDir("learnt.model");
+			TestUtils.compareMatrices(arima_model_R, arima_model_SYSTEMDS, tol, "arima_R", "arima_SYSTEMDS");
+		}
+		catch(Exception ex) {
+			ex.printStackTrace();
+		}
+		finally {
+			rtplatform = platformOld;
+		}
+	}
+}
diff --git a/src/test/scripts/applications/arima_box-jenkins/arima.dml b/src/test/scripts/applications/arima_box-jenkins/arima.dml
index ab56d39..948a560 100644
--- a/src/test/scripts/applications/arima_box-jenkins/arima.dml
+++ b/src/test/scripts/applications/arima_box-jenkins/arima.dml
@@ -288,7 +288,7 @@
 }
 
 result_format = ifdef($result_format, "csv")
-write(best_point, dest, format=result_format)
+write(best_point, dest)
 
  
 debug_printMatrixShape = function (Matrix[Double] M, String matrixIdentifier){
diff --git a/src/test/scripts/functions/builtin/arima.R b/src/test/scripts/functions/builtin/arima.R
new file mode 100644
index 0000000..a49ae08
--- /dev/null
+++ b/src/test/scripts/functions/builtin/arima.R
@@ -0,0 +1,299 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+#----------------------------------------
+# -------- r's arima function -----------
+# library("Matrix")
+# args = commandArgs(TRUE)
+# path_to_x = args[1]
+# max_func_invoc = as.integer(args[2])
+# p = as.integer(args[3])
+# d = as.integer(args[4])
+# q = as.integer(args[5])
+# P = as.integer(args[6])
+# D = as.integer(args[7])
+# Q = as.integer(args[8])
+# s = as.integer(args[9])
+# include_mean = as.integer(args[10])
+# solver = args[11]
+#
+# X = as.matrix(readMM(path_to_x))
+# model = arima(ts(X), order=c(p,d,q), seasonal=list(order=c(p,d,q),period=s),include.mean=include_mean,method="CSS")
+# print(coef(model))
+#----------------------------------------
+
+
+#----------------------------------------
+# ------- copied test function ----------
+args <- commandArgs(TRUE)
+library(Matrix)
+
+arima_css = function(w, X, p, P, q, Q, s, useJacobi){
+  b = matrix(X[,2:ncol(X)], nrow(X), ncol(X)-1)%*%w
+
+  R = matrix(0, nrow(X), nrow(X))
+  if(q>0){
+    for(i7 in 1:q){
+      ma_ind_ns = P+p+i7
+      err_ind_ns = i7
+      ones_ns = rep(1, nrow(R)-err_ind_ns)
+      d_ns = ones_ns * w[ma_ind_ns,1]
+      R[(1+err_ind_ns):nrow(R),1:(ncol(R)-err_ind_ns)] = R[(1+err_ind_ns):nrow(R),1:(ncol(R)-err_ind_ns)] + diag(d_ns)
+    }
+  }
+  if(Q>0){
+    for(i8 in 1:Q){
+      ma_ind_s = P+p+q+i8
+      err_ind_s = s*i8
+      ones_s = rep(1, nrow(R)-err_ind_s)
+      d_s = ones_s * w[ma_ind_s,1]
+      R[(1+err_ind_s):nrow(R),1:(ncol(R)-err_ind_s)] = R[(1+err_ind_s):nrow(R),1:(ncol(R)-err_ind_s)] + diag(d_s)
+    }
+  }
+
+  max_iter = 100
+  tol = 0.01
+
+  y_hat = matrix(0, nrow(X), 1)
+  iter = 0
+
+  if(useJacobi == 1){
+    check = sum(ifelse(rowSums(abs(R)) >= 1, 1, 0))
+    if(check > 0){
+      print("R is not diagonal dominant. Suggest switching to an exact solver.")
+    }
+    diff = tol+1.0
+    while(iter < max_iter & diff > tol){
+      y_hat_new = matrix(b - R%*%y_hat, nrow(y_hat), 1)
+      diff = sum((y_hat_new-y_hat)*(y_hat_new-y_hat))
+      y_hat = y_hat_new
+      iter = iter + 1
+    }
+  }else{
+    ones = rep(1, nrow(X))
+    A = R + diag(ones)
+    Z = t(A)%*%A
+    y = t(A)%*%b
+    r = -y
+    p = -r
+    norm_r2 = sum(r*r)
+    while(iter < max_iter & norm_r2 > tol){
+      q = Z%*%p
+      alpha = norm_r2 / sum(p*q)
+      y_hat = y_hat + alpha * p
+      old_norm_r2 = norm_r2
+      r = r + alpha * q
+      norm_r2 = sum(r * r)
+      beta = norm_r2 / old_norm_r2
+      p = -r + beta * p
+      iter = iter + 1
+    }
+  }
+
+  errs = X[,1] - y_hat
+  obj = sum(errs*errs)
+
+  return(obj)
+}
+
+#input col of time series data
+X = readMM(args[1])
+
+max_func_invoc = as.integer(args[2])
+
+#non-seasonal order
+p = as.integer(args[3])
+d = as.integer(args[4])
+q = as.integer(args[5])
+
+#seasonal order
+P = as.integer(args[6])
+D = as.integer(args[7])
+Q = as.integer(args[8])
+
+#length of the season
+s = as.integer(args[9])
+
+include_mean = as.integer(args[10])
+
+useJacobi = as.integer(args[11])
+
+num_rows = nrow(X)
+
+if(num_rows <= d){
+  print("non-seasonal differencing order should be larger than length of the time-series")
+}
+
+Y = matrix(X[,1], nrow(X), 1)
+if(d>0){
+  for(i in 1:d){
+    n1 = nrow(Y)
+    Y = matrix(Y[2:n1,] - Y[1:(n1-1),], n1-1, 1)
+  }
+}
+
+num_rows = nrow(Y)
+if(num_rows <= s*D){
+  print("seasonal differencing order should be larger than number of observations divided by length of season")
+}
+
+if(D>0){
+  for(i in 1:D){
+    n1 = nrow(Y)
+    Y = matrix(Y[(s+1):n1,] - Y[1:(n1-s),], n1-s, 1)
+  }
+}
+
+n = nrow(Y)
+
+max_ar_col = P+p
+max_ma_col = Q+q
+if(max_ar_col > max_ma_col){
+  max_arma_col = max_ar_col
+}else{
+  max_arma_col = max_ma_col
+}
+
+mu = 0
+if(include_mean == 1){
+  mu = sum(Y)/nrow(Y)
+  Y = Y - mu
+}
+
+totcols = 1+p+P+Q+q #target col (X), p-P cols, q-Q cols
+
+Z = matrix(0, n, totcols)
+Z[,1] = Y #target col
+
+if(p>0){
+  for(i1 in 1:p){
+    Z[(i1+1):n,1+i1] = Y[1:(n-i1),]
+  }
+}
+if(P>0){
+  for(i2 in 1:P){
+    Z[(s*i2+1):n,1+p+i2] = Y[1:(n-s*i2),]
+  }
+}
+if(q>0){
+  for(i5 in 1:q){
+    Z[(i5+1):n,1+P+p+i5] = Y[1:(n-i5),]
+  }
+}
+if(Q>0){
+  for(i6 in 1:Q){
+    Z[(s*i6+1):n,1+P+p+q+i6] = Y[1:(n-s*i6),]
+  }
+}
+
+simplex = matrix(0, totcols-1, totcols)
+for(i in 2:ncol(simplex)){
+  simplex[i-1,i] = 0.1
+}
+
+num_func_invoc = 0
+
+objvals = matrix(0, 1, ncol(simplex))
+for(i3 in 1:ncol(simplex)){
+  objvals[1,i3] = arima_css(matrix(simplex[,i3], nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi)
+}
+num_func_invoc = num_func_invoc + ncol(simplex)
+
+tol = 1.5 * 10^(-8) * objvals[1,1]
+
+continue = TRUE
+while(continue == 1 & num_func_invoc <= max_func_invoc) {
+  #print(paste(num_func_invoc, max_func_invoc))
+  best_index = 1
+  worst_index = 1
+  for(i in 2:ncol(objvals)){
+    this = objvals[1,i]
+    that = objvals[1,best_index]
+    if(that > this){
+      best_index = i
+    }
+    that = objvals[1,worst_index]
+    if(that < this){
+      worst_index = i
+    }
+  }
+
+  best_obj_val = objvals[1,best_index]
+  worst_obj_val = objvals[1,worst_index]
+  continue = (worst_obj_val > best_obj_val + tol)
+
+  #print(paste("#Function calls::", num_func_invoc, "OBJ:", best_obj_val))
+
+  c = (rowSums(simplex) - simplex[,worst_index])/(nrow(simplex))
+
+  x_r = 2*c - simplex[,worst_index]
+  obj_x_r = arima_css(matrix(x_r, nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi)
+  num_func_invoc = num_func_invoc + 1
+
+  if(obj_x_r < best_obj_val){
+    x_e = 2*x_r - c
+    obj_x_e = arima_css(matrix(x_e, nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi)
+    num_func_invoc = num_func_invoc + 1
+
+    simplex[,worst_index] = ifelse (obj_x_r <= obj_x_e, x_r, x_e)
+    objvals[1,worst_index] = ifelse (obj_x_r <= obj_x_e, obj_x_r, obj_x_e)
+    
+  } else {
+    if(obj_x_r < worst_obj_val){
+      simplex[,worst_index] = x_r
+      objvals[1,worst_index] = obj_x_r
+    }
+
+    x_c_in = (simplex[,worst_index] + c)/2
+    obj_x_c_in = arima_css(matrix(x_c_in, nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi)
+    num_func_invoc = num_func_invoc + 1
+
+    if(obj_x_c_in < objvals[1,worst_index]){
+      simplex[,worst_index] = x_c_in
+      objvals[1,worst_index] = obj_x_c_in
+    }else{
+      if(obj_x_r >= worst_obj_val){
+        best_point = simplex[,best_index]
+
+        for(i4 in 1:ncol(simplex)){
+          if(i4 != best_index){
+            simplex[,i4] = (simplex[,i4] + best_point)/2
+            objvals[1,i4] = arima_css(matrix(simplex[,i4], nrow(simplex), 1), Z, p, P, q, Q, s, useJacobi)
+          }
+        }
+        num_func_invoc = num_func_invoc + ncol(simplex) - 1
+      }
+    }
+  }
+}
+
+best_point = matrix(simplex[,best_index], nrow(simplex), 1)
+#(simplex)
+#print(best_point)
+if(include_mean == 1){
+  tmp = matrix(0, totcols, 1)
+  tmp[1:nrow(best_point),1] = best_point
+  tmp[nrow(tmp),1] = mu
+  best_point = tmp
+}
+
+writeMM(as(best_point, "CsparseMatrix"), args[12])
diff --git a/src/test/scripts/functions/builtin/arima.dml b/src/test/scripts/functions/builtin/arima.dml
new file mode 100644
index 0000000..8802b87
--- /dev/null
+++ b/src/test/scripts/functions/builtin/arima.dml
@@ -0,0 +1,28 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($X)
+solver = ifelse($useJacobi, "jacobi", "cg")
+
+coefficients = arima(X=X, max_func_invoc=$max_func_invoc, p=$p, d=$d, q=$q, 
+  P=$P, D=$D, Q=$Q, s=$s, include_mean=$include_mean, solver=solver)
+
+write(coefficients, $model)