| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| /** |
| * Factorization Machines for Regression. |
| */ |
| |
| # Imports |
| source("nn/optim/adam.dml") as adam |
| source("nn/layers/fm.dml") as fm |
| source("nn/layers/l2_loss.dml") as l2_loss |
| source("nn/layers/l2_reg.dml") as l2_reg |
| |
| train = function(matrix[double] X, matrix[double] y, matrix[double] X_val, matrix[double] y_val) |
| return (matrix[double] w0, matrix[double] W, matrix[double] V) { |
| /* |
| * Trains the FM model. |
| * |
| * Inputs: |
| * - X : n examples with d features, of shape (n, d) |
| * - y : Target matrix, of shape (n, 1) |
| * - X_val : Input validation data matrix, of shape (n, d) |
| * - y_val : Target validation matrix, of shape (n, 1) |
| * |
| * Outputs: |
| * - w0, W, V : updated model parameters. |
| * |
| * Network Architecture: |
| * |
| * X --> [model] --> out --> l2_loss::backward(out, y) --> dout |
| * |
| */ |
| |
| n = nrow(X) # num examples |
| d = ncol(X) # num features |
| k = 2 # factorization dimensionality, |
| # only (=2) possible |
| |
| # 1.initialize fm core |
| [w0, W, V] = fm::init(d, k); |
| |
| # 2.initialize adam optimizer |
| ## Default values for some parameters |
| lr = 0.001; |
| beta1 = 0.9; # [0, 1) |
| beta2 = 0.999; # [0, 1) |
| epsilon = 0.00000001; |
| t = 0; |
| |
| [mw0, vw0] = adam::init(w0); |
| [mW, vW] = adam::init(W); |
| [mV, vV] = adam::init(V); |
| |
| # regularization |
| lambda = 5e-04 |
| |
| # Optimize |
| print("Starting optimization") |
| batch_size = 10 |
| epochs = 100; N = n; |
| iters = ceil(N / batch_size) |
| |
| for (e in 1:epochs) { |
| for (i in 1:iters) { |
| # Get the next batch |
| beg = ((i-1) * batch_size) %% N + 1 |
| end = min(N, beg + batch_size - 1) |
| X_batch = X[beg:end,] |
| y_batch = y[beg:end,] |
| |
| # 3.Send inputs through fm::forward |
| out = fm::forward(X_batch, w0, W, V); |
| |
| # 4.compute gradients from a loss l2_loss::backward |
| dout = l2_loss::backward(out, y_batch)# (predictions, targets) |
| |
| # Compute loss & accuracy for training & validation data every 100 iterations. |
| if (i %% 100 == 0) { |
| # Compute training loss & accuracy |
| [loss_data, accuracy] = eval(out, y_batch); |
| loss_reg_w0 = l2_reg::forward(w0, lambda) |
| loss_reg_W = l2_reg::forward(W , lambda) |
| loss_reg_V = l2_reg::forward(V , lambda) |
| loss = loss_data + loss_reg_w0 + loss_reg_W + loss_reg_V |
| |
| # Compute validation loss & accuracy |
| probs_val = predict(X_val, w0, W, V) |
| [loss_val, accuracy_val] = eval(probs_val, y_val); |
| |
| # Output results |
| print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: " |
| + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val) |
| } |
| |
| # 5.Send the above result through fm::backward |
| [dw0, dW, dV] = fm::backward(dout, X_batch, w0, W, V); |
| |
| # 6.update timestep |
| t = e * i - 1; |
| |
| # 7.Call adam::update for all parameters |
| [w0,mw0,vw0] = adam::update(w0, dw0, lr, beta1, beta2, epsilon, t, mw0, vw0); |
| [W, mW, vW] = adam::update(W, dW, lr, beta1, beta2, epsilon, t, mW, vW ); |
| [V, mV, vV] = adam::update(V, dV, lr, beta1, beta2, epsilon, t, mV, vV ); |
| |
| } |
| } |
| } |
| |
| predict = function(matrix[double] X, matrix[double] w0, matrix[double] W, matrix[double] V) |
| return (matrix[double] out) { |
| /* |
| * Computes the predictions for the given inputs. |
| * |
| * Inputs: |
| * - X : n examples with d features, of shape (n, d). |
| * - w0, W, V : trained model parameters. |
| * |
| * Outputs: |
| * - out : target vector, y. |
| */ |
| |
| # 1.Send inputs through fm::forward |
| out = fm::forward(X, w0, W, V); |
| |
| } |
| |
| eval = function(matrix[double] probs, matrix[double] y) |
| return (double loss, double accuracy) { |
| /* |
| * Computes loss and accuracy. |
| */ |
| |
| # compute the log loss |
| loss = l2_loss::forward(probs, y); |
| |
| # compute accuracy |
| sqr_mean = mean( (probs - y)^2 ) |
| accuracy = (sqr_mean)^0.5 |
| |
| } |
| |