[SYSTEMDS-2772] Add Autoencoder (2-layer) builtin
DIA project WS2020/21.
diff --git a/scripts/builtin/.autoencoder_2layer.dml.swp b/scripts/builtin/.autoencoder_2layer.dml.swp
new file mode 100644
index 0000000..a4d3db2
--- /dev/null
+++ b/scripts/builtin/.autoencoder_2layer.dml.swp
Binary files differ
diff --git a/scripts/builtin/autoencoder_2layer.dml b/scripts/builtin/autoencoder_2layer.dml
new file mode 100644
index 0000000..1f54604
--- /dev/null
+++ b/scripts/builtin/autoencoder_2layer.dml
@@ -0,0 +1,234 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Trains a 2-layer autoencoder with minibatch SGD and step-size decay.
+# If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder
+# Weights are initialized using Glorot & Bengio (2010) AISTATS initialization.
+# The script standardizes the input before training (can be turned off).
+# Also, it randomly reshuffles rows before training.
+# Currently, tanh is set to be the activation function.
+# By re-implementing 'func' DML-bodied function, one can change the activation.
+
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# X String --- Filename where the input is stored
+# H1 Int --- Number of neurons in the 1st hidden layer
+# H2 Int --- Number of neurons in the 2nd hidden layer
+# EPOCH Int --- Number of epochs to train for
+# OBJ Boolean FALSE If TRUE, Computes objective function value (squared-loss)
+# at the end of each epoch. Note that, computing the full
+# objective can take a lot of time.
+# BATCH Int 256 Mini-batch size (training parameter)
+# STEP Double 1e-5 Initial step size (training parameter)
+# DECAY Double 0.95 Decays step size after each epoch (training parameter)
+# MOMENTUM Double 0.9 Momentum parameter (training parameter)
+#
+# W1_rand Int Rand Weights might be initialized via input matrices
+# W2_rand Int Rand
+# W3_rand Int Rand
+# W4_rand Int Rand
+# ---------------------------------------------------------------------------------------------
+# OUTPUT:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# W1_out Matrix[Double] --- Matrix storing weights between input layer and 1st hidden layer
+# b1_out Matrix[Double] --- Matrix storing bias between input layer and 1st hidden layer
+# W2_out Matrix[Double] --- Matrix storing weights between 1st hidden layer and 2nd hidden layer
+# b2_out Matrix[Double] --- Matrix storing bias between 1st hidden layer and 2nd hidden layer
+# W3_out Matrix[Double] --- Matrix storing weights between 2nd hidden layer and 3rd hidden layer
+# b3_out Matrix[Double] --- Matrix storing bias between 2nd hidden layer and 3rd hidden layer
+# W4_out Matrix[Double] --- Matrix storing weights between 3rd hidden layer and output layer
+# b4_out Matrix[Double] --- Matrix storing bias between 3rd hidden layer and output layer
+# HIDDEN Matrix[Double] --- Matrix storing the hidden (2nd) layer representation if needed
+# ---------------------------------------------------------------------------------------------
+#
+
+
+m_autoencoder_2layer = function(Matrix[Double] X, Integer num_hidden1, Integer num_hidden2, Integer max_epochs,
+ Boolean full_obj = FALSE, Integer batch_size = 256, Double step = 1e-5, Double decay = 0.95, Double mu = 0.9,
+ Matrix[Double] W1_rand = matrix(0, rows=0, cols=0), Matrix[Double] W2_rand = matrix(0, rows=0, cols=0),
+ Matrix[Double] W3_rand = matrix(0, rows=0, cols=0), Matrix[Double] W4_rand = matrix(0, rows=0, cols=0),
+ Matrix[Double] order_rand = matrix(0, rows=0, cols=0))
+ return(Matrix[Double] W1, Matrix[Double] b1, Matrix[Double] W2, Matrix[Double] b2,
+ Matrix[Double] W3, Matrix[Double] b3, Matrix[Double] W4, Matrix[Double] b4, Matrix[Double] reordered_H)
+{
+ n = nrow(X)
+ m = ncol(X)
+ #randomly reordering rows
+ if(nrow(order_rand) == 0 & ncol(order_rand) == 0)
+ permut = table(seq(1,n,1), order(target=Rand(rows=n, cols=1, min=0, max=1, pdf="uniform"), by=1, index.return=TRUE), n, n)
+ else
+ permut = table(seq(1,n,1), order(target=order_rand, by=1, index.return=TRUE), n, n)
+ X = permut %*% X
+
+ #z-transform, whitening operator is better
+ means = colSums(X)/n
+ stds = sqrt((colSums(X^2)/n - means*means)*n/(n-1)) + 1e-17
+ X = (X - means)/stds
+
+ if(nrow(W1_rand) == 0 & ncol(W1_rand) == 0)
+ W1_rand = Rand(rows=num_hidden1, cols=m, min=-1, max=1, pdf="uniform")
+ if(nrow(W2_rand) == 0 & ncol(W2_rand) == 0)
+ W2_rand = Rand(rows=num_hidden2, cols=num_hidden1, min=-1, max=1, pdf="uniform")
+ if(nrow(W3_rand) == 0 & ncol(W3_rand) == 0)
+ W3_rand = Rand(rows=num_hidden1, cols=num_hidden2, min=-1, max=1, pdf="uniform")
+ if(nrow(W4_rand) == 0 & ncol(W4_rand) == 0)
+ W4_rand = Rand(rows=m, cols=num_hidden1, min=-1, max=1, pdf="uniform")
+
+ W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand
+ b1 = matrix(0, rows=num_hidden1, cols=1)
+ W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand
+ b2 = matrix(0, rows=num_hidden2, cols=1)
+ W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand
+ b3 = matrix(0, rows=num_hidden1, cols=1)
+ W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand
+ b4 = matrix(0, rows=m, cols=1)
+
+ upd_W1 = matrix(0, rows=nrow(W1), cols=ncol(W1))
+ upd_b1 = matrix(0, rows=nrow(b1), cols=ncol(b1))
+ upd_W2 = matrix(0, rows=nrow(W2), cols=ncol(W2))
+ upd_b2 = matrix(0, rows=nrow(b2), cols=ncol(b2))
+ upd_W3 = matrix(0, rows=nrow(W3), cols=ncol(W3))
+ upd_b3 = matrix(0, rows=nrow(b3), cols=ncol(b3))
+ upd_W4 = matrix(0, rows=nrow(W4), cols=ncol(W4))
+ upd_b4 = matrix(0, rows=nrow(b4), cols=ncol(b4))
+
+ if( full_obj ){
+ [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat,
+ full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+ full_o = obj(full_E)
+ print("EPOCHS=" + 0 + " OBJ (FULL DATA): " + full_o)
+ }
+
+ iter = 0
+ num_iters_per_epoch = ceil(n / batch_size)
+ max_iterations = max_epochs * num_iters_per_epoch
+ #print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + max_iterations)
+ beg = 1
+ while( iter < max_iterations ){
+ end = beg + batch_size - 1
+ if(end > n) end = n
+ X_batch = X[beg:end,]
+
+ [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E]
+ = feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch)
+ [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad, W4_grad, b4_grad]
+ = grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat_prime, E, W1, W2, W3, W4)
+
+ o = obj(E)
+ print("epochs=%5.4f BATCH beg=%d end=%d obj=%f", (iter / num_iters_per_epoch), beg, end, o)
+
+ #update
+ local_step = step / nrow(X_batch)
+ upd_W1 = mu * upd_W1 - local_step * W1_grad
+ upd_b1 = mu * upd_b1 - local_step * b1
+ upd_W2 = mu * upd_W2 - local_step * W2_grad
+ upd_b2 = mu * upd_b2 - local_step * b2
+ upd_W3 = mu * upd_W3 - local_step * W3_grad
+ upd_b3 = mu * upd_b3 - local_step * b3
+ upd_W4 = mu * upd_W4 - local_step * W4_grad
+ upd_b4 = mu * upd_b4 - local_step * b4
+ W1 = W1 + upd_W1
+ b1 = b1 + upd_b1
+ W2 = W2 + upd_W2
+ b2 = b2 + upd_b2
+ W3 = W3 + upd_W3
+ b3 = b3 + upd_b3
+ W4 = W4 + upd_W4
+ b4 = b4 + upd_b4
+
+ iter = iter + 1
+ if(end == n) beg = 1
+ else beg = end + 1
+
+ if( iter %% num_iters_per_epoch == 0 ) step = step * decay
+
+ if( full_obj & iter %% num_iters_per_epoch == 0 ){
+ [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat,
+ full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+ full_o = obj(full_E)
+ epochs = iter %/% num_iters_per_epoch
+ print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + full_o)
+ }
+ }
+
+ [full_H1, full_H1_prime, full_H2, full_H2_prime, full_H3, full_H3_prime, full_Yhat,
+ full_Yhat_prime, full_E] = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+ reordered_H = t(permut) %*% full_H2
+}
+
+#implements tanh
+#to use another activation fn, implement a DML-bodied function with
+#function name 'func' and comment out this one
+func = function(Matrix[Double] X) return(Matrix[Double] Y, Matrix[Double] Y_prime){
+ Y = (exp(2*X) - 1)/(exp(2*X) + 1)
+ Y_prime = 1 - Y^2
+}
+
+feedForward = function(Matrix[Double] X, Matrix[Double] W1, Matrix[Double] b1, Matrix[Double] W2, Matrix[Double] b2,
+ Matrix[Double] W3, Matrix[Double] b3, Matrix[Double] W4, Matrix[Double] b4, Matrix[Double] Y)
+ return(Matrix[Double] H1, Matrix[Double] H1_prime,Matrix[Double] H2, Matrix[Double] H2_prime,
+ Matrix[Double] H3, Matrix[Double] H3_prime, Matrix[Double] Yhat, Matrix[Double] Yhat_prime, Matrix[Double] E)
+{
+ H1_in = t(W1 %*% t(X) + b1)
+ [H1, H1_prime] = func(H1_in)
+
+ H2_in = t(W2 %*% t(H1) + b2)
+ [H2, H2_prime] = func(H2_in)
+
+ H3_in = t(W3 %*% t(H2) + b3)
+ [H3, H3_prime] = func(H3_in)
+
+ Yhat_in = t(W4 %*% t(H3) + b4)
+ [Yhat, Yhat_prime] = func(Yhat_in)
+ E = Yhat - Y
+}
+
+grad = function(Matrix[Double] X, Matrix[Double] H1, Matrix[Double] H1_prime, Matrix[Double] H2, Matrix[Double] H2_prime,
+ Matrix[Double] H3, Matrix[Double] H3_prime, Matrix[Double] Yhat_prime, Matrix[Double] E,
+ Matrix[Double] W1, Matrix[Double] W2, Matrix[Double] W3, Matrix[Double] W4)
+ return(Matrix[Double] W1_grad, Matrix[Double] b1_grad,Matrix[Double] W2_grad, Matrix[Double] b2_grad,
+ Matrix[Double] W3_grad, Matrix[Double] b3_grad, Matrix[Double] W4_grad, Matrix[Double] b4_grad)
+{
+ #backprop
+ delta4 = E * Yhat_prime
+ delta3 = H3_prime * (delta4 %*% W4)
+ delta2 = H2_prime * (delta3 %*% W3)
+ delta1 = H1_prime * (delta2 %*% W2)
+
+ #compute gradients
+ b4_grad = t(colSums(delta4))
+ b3_grad = t(colSums(delta3))
+ b2_grad = t(colSums(delta2))
+ b1_grad = t(colSums(delta1))
+
+ W4_grad = t(delta4) %*% H3
+ W3_grad = t(delta3) %*% H2
+ W2_grad = t(delta2) %*% H1
+ W1_grad = t(delta1) %*% X
+}
+
+obj = function(Matrix[Double] E) return(Double val){
+ val = 0.5 * sum(E^2)
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java
index 19cfa16..c63fa45 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -47,6 +47,7 @@
ALS_DS("alsDS", true),
ASIN("asin", false),
ATAN("atan", false),
+ AUTOENCODER2LAYER("autoencoder_2layer", true),
AVG_POOL("avg_pool", false),
AVG_POOL_BACKWARD("avg_pool_backward", false),
BATCH_NORM2D("batch_norm2d", false, ReturnType.MULTI_RETURN),
diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinAutoencoder2LayerTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinAutoencoder2LayerTest.java
new file mode 100644
index 0000000..81f93a5
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinAutoencoder2LayerTest.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+
+import org.apache.sysds.runtime.matrix.data.MatrixValue;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Test;
+
+import java.util.HashMap;
+
+
+public class BuiltinAutoencoder2LayerTest extends AutomatedTestBase
+{
+ private final static String TEST_NAME = "autoencoder_2layer";
+ private final static String TEST_DIR = "functions/builtin/";
+ private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinAutoencoder2LayerTest.class.getSimpleName() + "/";
+
+ private final static int rows = 1058;
+ private final static int cols = 784;
+ private final static double sparse = 0.1;
+ private final static double dense = 0.7;
+ private final static double tolerance = 1e-3;
+
+ private static int batchSize = 256;
+ private static double step = 1e-5;
+ private static double decay = 0.95;
+ private static double momentum = 0.9;
+ private static boolean obj = false;
+
+ @Override
+ public void setUp() {
+ TestUtils.clearAssertionInformation();
+ addTestConfiguration(TEST_CLASS_DIR, TEST_NAME);
+ }
+
+ public void restoreDefaultParams(){
+ batchSize = 256;
+ step = 1e-5;
+ decay = 0.95;
+ momentum = 0.9;
+ obj = false;
+ }
+
+ @Test
+ public void testAutoencoderSparse80To5() {
+ runAutoencoderTest(80, 5, 2, sparse);
+ }
+
+ @Test
+ public void testAutoencoderDense80To5() {
+ runAutoencoderTest(80, 5, 2, dense);
+ }
+
+ @Test
+ public void testAutoencoderSparse10To4() {
+ runAutoencoderTest(10, 4, 2, sparse);
+ }
+
+ @Test
+ public void testAutoencoderDense10To4() {
+ runAutoencoderTest(10, 4, 2, dense);
+ }
+
+ @Test
+ public void testAutoencoderSparse200To20FullObj() {
+ obj = true;
+ runAutoencoderTest(200, 20, 2, sparse);
+ restoreDefaultParams();
+ }
+
+ @Test
+ public void testAutoencoderDense120To10Batch512() {
+ batchSize = 512;
+ runAutoencoderTest(120, 10, 2, dense);
+ restoreDefaultParams();
+ }
+
+ @Test
+ public void testAutoencoderSparse200To12DecMomentum() {
+ momentum = 0.8;
+ runAutoencoderTest(200, 12, 2, sparse);
+ restoreDefaultParams();
+ }
+
+ @Test
+ public void testAutoencoderSparse200To12IncMomentum() {
+ momentum = 0.95;
+ runAutoencoderTest(200, 12, 2, sparse);
+ restoreDefaultParams();
+ }
+
+ @Test
+ public void testAutoencoderDense20To3DecDecay() {
+ decay = 0.85;
+ runAutoencoderTest(20, 3, 2, dense);
+ restoreDefaultParams();
+ }
+
+ @Test
+ public void testAutoencoderDense500To3FullObjBatch512IncStep() {
+ obj = true;
+ batchSize = 512;
+ step = 1e-4;
+ runAutoencoderTest(500, 3, 2, dense);
+ restoreDefaultParams();
+ }
+
+ @Test
+ public void testAutoencoderSparse354To7FullObjBatch512DecStepIncMomentumDecDecay() {
+ obj = true;
+ batchSize = 512;
+ step = 1e-6;
+ momentum = 0.95;
+ decay = 0.90;
+ runAutoencoderTest(354, 7, 2, sparse);
+ restoreDefaultParams();
+ }
+
+ private void runAutoencoderTest(int numHidden1, int numHidden2, int maxEpochs, double sparsity) {
+ loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ String fullObj = obj ? "TRUE" : "FALSE";
+ programArgs = new String[]{ "-stats", "-nvargs", "X="+input("X"),
+ "H1="+numHidden1, "H2="+numHidden2, "EPOCH="+maxEpochs, "BATCH="+batchSize,
+ "STEP="+step, "DECAY="+decay, "MOMENTUM="+momentum, "OBJ="+fullObj,
+ "W1_rand="+input("W1_rand"),"W2_rand="+input("W2_rand"),
+ "W3_rand="+input("W3_rand"), "W4_rand="+input("W4_rand"),
+ "order_rand="+input("order_rand"),
+ "W1_out="+output("W1_out"), "b1_out="+output("b1_out"),
+ "W2_out="+output("W2_out"), "b2_out="+output("b2_out"),
+ "W3_out="+output("W3_out"), "b3_out="+output("b3_out"),
+ "W4_out="+output("W4_out"), "b4_out="+output("b4_out"),
+ "hidden_out="+output("hidden_out")};
+ fullRScriptName = HOME + TEST_NAME + ".R";
+ rCmd = getRCmd(inputDir(), String.valueOf(numHidden1), String.valueOf(numHidden2), String.valueOf(maxEpochs),
+ String.valueOf(batchSize), String.valueOf(momentum), String.valueOf(step), String.valueOf(decay), fullObj, expectedDir());
+
+ double[][] X = getRandomMatrix(rows, cols, 0, 1, sparsity, 27);
+ writeInputMatrixWithMTD("X", X, true);
+
+ //generate rand matrices for W1, W2, W3, W4 here itself for passing onto both DML and R scripts
+ double[][] W1_rand = getRandomMatrix(numHidden1, cols, 0, 1, sparsity, 800);
+ writeInputMatrixWithMTD("W1_rand", W1_rand, true);
+ double[][] W2_rand = getRandomMatrix(numHidden2, numHidden1, 0, 1, sparsity, 900);
+ writeInputMatrixWithMTD("W2_rand", W2_rand, true);
+ double[][] W3_rand = getRandomMatrix(numHidden1, numHidden2, 0, 1, sparsity, 589);
+ writeInputMatrixWithMTD("W3_rand", W3_rand, true);
+ double[][] W4_rand = getRandomMatrix(cols, numHidden1, 0, 1, sparsity, 150);
+ writeInputMatrixWithMTD("W4_rand", W4_rand, true);
+ double[][] order_rand = getRandomMatrix(rows, 1, 0, 1, sparsity, 143);
+ writeInputMatrixWithMTD("order_rand", order_rand, true); //for the permut operation on input X
+ runTest(true, false, null, -1);
+
+ runRScript(true);
+
+ //compare matrices
+ HashMap<MatrixValue.CellIndex, Double> w1OutDML = readDMLMatrixFromOutputDir("W1_out");
+ HashMap<MatrixValue.CellIndex, Double> b1OutDML = readDMLMatrixFromOutputDir("b1_out");
+ HashMap<MatrixValue.CellIndex, Double> w2OutDML = readDMLMatrixFromOutputDir("W2_out");
+ HashMap<MatrixValue.CellIndex, Double> b2OutDML = readDMLMatrixFromOutputDir("b2_out");
+ HashMap<MatrixValue.CellIndex, Double> w3OutDML = readDMLMatrixFromOutputDir("W3_out");
+ HashMap<MatrixValue.CellIndex, Double> b3OutDML = readDMLMatrixFromOutputDir("b3_out");
+ HashMap<MatrixValue.CellIndex, Double> w4OutDML = readDMLMatrixFromOutputDir("W4_out");
+ HashMap<MatrixValue.CellIndex, Double> b4OutDML = readDMLMatrixFromOutputDir("b4_out");
+
+ HashMap<MatrixValue.CellIndex, Double> w1OutR = readRMatrixFromExpectedDir("W1_out");
+ HashMap<MatrixValue.CellIndex, Double> b1OutR = readRMatrixFromExpectedDir("b1_out");
+ HashMap<MatrixValue.CellIndex, Double> w2OutR = readRMatrixFromExpectedDir("W2_out");
+ HashMap<MatrixValue.CellIndex, Double> b2OutR = readRMatrixFromExpectedDir("b2_out");
+ HashMap<MatrixValue.CellIndex, Double> w3OutR = readRMatrixFromExpectedDir("W3_out");
+ HashMap<MatrixValue.CellIndex, Double> b3OutR = readRMatrixFromExpectedDir("b3_out");
+ HashMap<MatrixValue.CellIndex, Double> w4OutR = readRMatrixFromExpectedDir("W4_out");
+ HashMap<MatrixValue.CellIndex, Double> b4OutR = readRMatrixFromExpectedDir("b4_out");
+
+ TestUtils.compareMatrices(w1OutDML, w1OutR, tolerance, "W1-DML", "W1-R");
+ TestUtils.compareMatrices(b1OutDML, b1OutR, tolerance, "b1-DML", "b1-R");
+ TestUtils.compareMatrices(w2OutDML, w2OutR, tolerance, "W2-DML", "W2-R");
+ TestUtils.compareMatrices(b2OutDML, b2OutR, tolerance, "b2-DML", "b2-R");
+ TestUtils.compareMatrices(w3OutDML, w3OutR, tolerance, "W3-DML", "W3-R");
+ TestUtils.compareMatrices(b3OutDML, b3OutR, tolerance, "b3-DML", "b3-R");
+ TestUtils.compareMatrices(w4OutDML, w4OutR, tolerance, "W4-DML", "W4-R");
+ TestUtils.compareMatrices(b4OutDML, b4OutR, tolerance, "b4-DML", "b4-R");
+ }
+}
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/autoencoder_2layer.R b/src/test/scripts/functions/builtin/autoencoder_2layer.R
new file mode 100644
index 0000000..4dbabd3
--- /dev/null
+++ b/src/test/scripts/functions/builtin/autoencoder_2layer.R
@@ -0,0 +1,264 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+#
+# Trains a 2-layer autoencoder with minibatch SGD, momentum and step-size decay.
+# If invoked with H1 > H2 then it becomes a 'bowtie' structured autoencoder
+# Weights are initialized using Glorot & Bengio (2010) AISTATS initialization.
+# The script standardizes the input before training (can be turned off).
+# Also, it randomly reshuffles rows before training.
+# Currently, tanh is set to be the activation function.
+# By re-implementing 'func' DML-bodied function, one can change the activation.
+
+# INPUT PARAMETERS:
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# X String --- Filename where the input is stored
+# H1 Int --- Number of neurons in the 1st hidden layer
+# H2 Int --- Number of neurons in the 2nd hidden layer
+# EPOCH Int --- Number of epochs to train for
+# BATCH Int 256 Mini-batch size (training parameter)
+# ---------------------------------------------------------------------------------------------
+#
+# OUTPUT PARAMETERS (all filenames):
+# ---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+# ---------------------------------------------------------------------------------------------
+# OUT String --- File to store weights and biases between all layers
+# HIDDEN String " " File to store the hidden (2nd) layer representation if needed
+# ---------------------------------------------------------------------------------------------
+# Assume that $AUTOENCODERR_HOME is set to the home of the R script
+# Assume input and output directories are $AUTOENCODERR_HOME/in/ and $AUTOENCODERR_HOME/expected/
+# Rscript $AUTOENCODERR_HOME/Autoencoder.R $AUTOENCODERR_HOME/in/ 500 2 10 256 csv $AUTOENCODERR_HOME/expected/
+
+args <- commandArgs(TRUE)
+library("Matrix")
+
+#1. tanh function
+func = function(X){
+ Y = (exp(2*X) - 1)/(exp(2*X) + 1)
+ return(Y)
+}
+
+func1 = function(X) {
+ Y = (exp(2*X) - 1)/(exp(2*X) + 1)
+ Y_prime = 1 - Y^2
+ return(Y_prime)
+}
+
+#2. feedForward
+
+obj <- function(E){
+ val = 0.5 * sum(E^2)
+ return(val)
+}
+
+
+X = readMM(paste(args[1], "X.mtx", sep=""));
+W1_rand = readMM(paste(args[1], "W1_rand.mtx", sep=""));
+W2_rand = readMM(paste(args[1], "W2_rand.mtx", sep=""));
+W3_rand = readMM(paste(args[1], "W3_rand.mtx", sep=""));
+W4_rand = readMM(paste(args[1], "W4_rand.mtx", sep=""));
+order_rand = readMM(paste(args[1], "order_rand.mtx", sep=""));
+
+num_hidden1 = as.integer(args[2]) #$H1
+num_hidden2 = as.integer(args[3]) #$H2
+max_epochs = as.integer(args[4]) #$EPOCH
+batch_size = as.integer(args[5]) #$BATCH
+
+mu = as.double(args[6])# momentum
+step = as.double(args[7])
+decay = as.double(args[8])
+#hfile = args[6]
+fmt = "text"
+full_obj = as.logical(args[9])
+
+n = nrow(X)
+m = ncol(X)
+
+#randomly reordering rows
+#permut = table(seq(from=1,to=n,by=1), order(runif(n, min=0, max=1)))
+permut = table(seq(from=1,to=n,by=1), order(order_rand))
+permut = as.data.frame.matrix(permut)
+permut = data.matrix(permut)
+X = (permut %*% X)
+#z-transform, whitening operator is better
+means = t(as.matrix(colSums(X)))/n
+csx2 = t(as.matrix(colSums(X^2)))/n
+stds = sqrt(csx2 - (means*means)*n/(n-1)) + 1e-17
+X = (X - matrix(1, nrow(X),1) %*% means)/(matrix(1,nrow(X),1) %*% stds)
+
+W1 = sqrt(6)/sqrt(m + num_hidden1) * W1_rand
+b1 = matrix(0, num_hidden1, 1)
+
+W2 = sqrt(6)/sqrt(num_hidden1 + num_hidden2) * W2_rand
+b2 = matrix(0, num_hidden2, 2)
+
+W3 = sqrt(6)/sqrt(num_hidden2 + num_hidden1) * W3_rand
+b3 = matrix(0, num_hidden1, 1)
+
+W4 = sqrt(6)/sqrt(num_hidden2 + m) * W4_rand
+b4 = matrix(0, m, 1)
+
+upd_W1 = matrix(0, nrow(W1), ncol(W1))
+upd_b1 = matrix(0, nrow(b1), ncol(b1))
+upd_W2 = matrix(0, nrow(W2), ncol(W2))
+upd_b2 = matrix(0, nrow(b2), ncol(b2))
+upd_W3 = matrix(0, nrow(W3), ncol(W3))
+upd_b3 = matrix(0, nrow(b3), ncol(b3))
+upd_W4 = matrix(0, nrow(W4), ncol(W4))
+upd_b4 = matrix(0, nrow(b4), ncol(b4))
+
+iter = 0
+num_iters_per_epoch = ceiling(n / batch_size)
+max_iterations = max_epochs * num_iters_per_epoch
+# debug
+# print("num_iters_per_epoch=" + num_iters_per_epoch + " max_iterations=" + max_iterations)
+beg = 1
+while( iter < max_iterations ) {
+ end = beg + batch_size - 1
+ if(end > n) end = n
+ X_batch = X[beg:end,]
+
+ # Notation:
+ # 1 2 3 4 5 6 7 8 9
+ # [H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat, Yhat_prime, E]
+ # tmp_ff = feedForward(X_batch, W1, b1, W2, b2, W3, b3, W4, b4, X_batch)
+ # H1 = tmp_ff[1]; H1_prime = tmp_ff[2]; H2 = tmp_ff[3]; H2_prime = tmp_ff[4];
+ # H3 = tmp_ff[5]; H3_prime = tmp_ff[6]; Yhat = tmp_ff[7]; Yhat_prime = tmp_ff[8];
+ # E = tmp_ff[9]
+ # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X_batch
+ H1_in = t(W1 %*% t(X_batch) + b1 %*% matrix(1,ncol(b1),nrow(X_batch)))
+ H1 = func(H1_in)
+ H1_prime = func1(H1_in)
+
+ H2_in = t(W2 %*% t(H1) + b2 %*% matrix(1,ncol(b2),nrow(H1)))
+ H2 = func(H2_in)
+ H2_prime = func1(H2_in)
+
+ H3_in = t(W3 %*% t(H2) + b3 %*% matrix(1,ncol(b3),nrow(H2)))
+ H3 = func(H3_in)
+ H3_prime = func1(H3_in)
+
+ Yhat_in = t(W4 %*% t(H3) + b4 %*% matrix(1,ncol(b4),nrow(H3)))
+ Yhat = func(Yhat_in)
+ Yhat_prime = func1(Yhat_in)
+
+ E = Yhat - X_batch
+
+ # Notation:
+ # 1 2 3 4 5 6 7 8
+ # [W1_grad, b1_grad, W2_grad, b2_grad, W3_grad, b3_grad,W4_grad, b4_grad]
+ # tmp_grad = grad(X_batch, H1, H1_prime, H2, H2_prime, H3, H3_prime, Yhat_prime, E, W1, W2, W3, W4)
+ # W1_grad = tmp_grad[1]; b1_grad = tmp_grad[2]; W2_grad = tmp_grad[3]; b2_grad = tmp_grad[4];
+ # W3_grad = tmp_grad[5]; b3_grad = tmp_grad[6]; W4_grad = tmp_grad[7]; b4_grad = tmp_grad[8];
+ # grad function
+
+ #backprop
+ delta4 = E * Yhat_prime
+ delta3 = H3_prime * (delta4 %*% W4)
+ delta2 = H2_prime * (delta3 %*% W3)
+ delta1 = H1_prime * (delta2 %*% W2)
+
+ #compute gradients
+ b4_grad = (colSums(delta4))
+ b3_grad = (colSums(delta3))
+ b2_grad = (colSums(delta2))
+ b1_grad = (colSums(delta1))
+
+ W4_grad = t(delta4) %*% H3
+ W3_grad = t(delta3) %*% H2
+ W2_grad = t(delta2) %*% H1
+ W1_grad = t(delta1) %*% X_batch
+
+ ob = obj(E)
+ epochs = iter / num_iters_per_epoch
+ # debug
+ # print(table(epochs, ob), zero.print = "0")
+
+ #update
+ local_step = step / nrow(X_batch)
+ upd_W1 = mu * upd_W1 - local_step * W1_grad
+ upd_b1 = mu * upd_b1 - local_step * b1
+ upd_W2 = mu * upd_W2 - local_step * W2_grad
+ upd_b2 = mu * upd_b2 - local_step * b2
+ upd_W3 = mu * upd_W3 - local_step * W3_grad
+ upd_b3 = mu * upd_b3 - local_step * b3
+ upd_W4 = mu * upd_W4 - local_step * W4_grad
+ upd_b4 = mu * upd_b4 - local_step * b4
+ W1 = W1 + upd_W1
+ b1 = b1 + upd_b1
+ W2 = W2 + upd_W2
+ b2 = b2 + upd_b2
+ W3 = W3 + upd_W3
+ b3 = b3 + upd_b3
+ W4 = W4 + upd_W4
+ b4 = b4 + upd_b4
+
+ iter = iter + 1
+ if(end == n) beg = 1
+ else beg = end + 1
+
+ if(iter %% num_iters_per_epoch == 0)
+ step = step * decay
+
+ if(full_obj & iter %% num_iters_per_epoch == 0 ) {
+ # Notation:
+ # tmp_ff = feedForward(X, W1, b1, W2, b2, W3, b3, W4, b4, X)
+ # full_H1 = tmp_ff[1]; full_H1_prime = tmp_ff[2]; full_H2 = tmp_ff[3]; full_H2_prime = tmp_ff[4];
+ # full_H3 = tmp_ff[5]; full_H3_prime = tmp_ff[6]; full_Yhat = tmp_ff[7]; full_Yhat_prime = tmp_ff[8];
+ # full_E = tmp_ff[9];
+ # inputs: X, W1, b1, W2, b2, W3, b3, W4, b4, X
+ H1_in = t(W1 %*% t(X) + b1 %*% matrix(1,ncol(b1),nrow(X)))
+ full_H1 = func(H1_in)
+ full_H1_prime = func1(H1_in)
+
+ H2_in = t(W2 %*% t(H1) + b2 %*% matrix(1,ncol(b2),nrow(H1)))
+ full_H2 = func(H2_in)
+ full_H2_prime = func1(H2_in)
+
+ H3_in = t(W3 %*% t(H2) + b3 %*% matrix(1,ncol(b3),nrow(H2)))
+ full_H3 = func(H3_in)
+ full_H3_prime = func1(H3_in)
+
+ Yhat_in = t(W4 %*% t(H3) + b4 %*% matrix(1,ncol(b4),nrow(H3)))
+ full_Yhat = func(Yhat_in)
+ full_Yhat_prime = func1(Yhat_in)
+ full_E = full_Yhat - X_batch
+
+ full_o = obj(full_E)
+ epochs = iter %/% num_iters_per_epoch
+ # debug
+ # print(table(epochs, full_o, deparse.level=2), zero.print=".")
+ # print("EPOCHS=" + epochs + " iter=" + iter + " OBJ (FULL DATA)=" + full_o)
+ }
+}
+
+#debug
+#print.table(W1, digits=3)
+writeMM(as(W1,"CsparseMatrix"), paste(args[10], "W1_out", sep=""));
+writeMM(as(b1,"CsparseMatrix"), paste(args[10], "b1_out", sep=""));
+writeMM(as(W2,"CsparseMatrix"), paste(args[10], "W2_out", sep=""));
+writeMM(as(b2,"CsparseMatrix"), paste(args[10], "b2_out", sep=""));
+writeMM(as(W3,"CsparseMatrix"), paste(args[10], "W3_out", sep=""));
+writeMM(as(b3,"CsparseMatrix"), paste(args[10], "b3_out", sep=""));
+writeMM(as(W4,"CsparseMatrix"), paste(args[10], "W4_out", sep=""));
+writeMM(as(b4,"CsparseMatrix"), paste(args[10], "b4_out", sep=""));
diff --git a/src/test/scripts/functions/builtin/autoencoder_2layer.dml b/src/test/scripts/functions/builtin/autoencoder_2layer.dml
new file mode 100644
index 0000000..05574cb
--- /dev/null
+++ b/src/test/scripts/functions/builtin/autoencoder_2layer.dml
@@ -0,0 +1,35 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+[W1, b1, W2, b2, W3, b3, W4, b4, hidden] = autoencoder_2layer(X = read($X), W1_rand = read($W1_rand), W2_rand = read($W2_rand),
+ W3_rand = read($W3_rand), W4_rand = read($W4_rand), order_rand = read($order_rand),
+ num_hidden1 = $H1, num_hidden2 = $H2, max_epochs = $EPOCH, full_obj = $OBJ,
+ batch_size = $BATCH, step = $STEP, decay = $DECAY, mu = $MOMENTUM)
+
+write(W1, $W1_out);
+write(b1, $b1_out);
+write(W2, $W2_out);
+write(b2, $b2_out);
+write(W3, $W3_out);
+write(b3, $b3_out);
+write(W4, $W4_out);
+write(b4, $b4_out);
+write(hidden, $hidden_out)
\ No newline at end of file