| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| |
| /* |
| * Factorization Machines for binary classification. |
| */ |
| |
| # Imports |
| source("nn/optim/adam.dml") as adam |
| source("nn/layers/fm.dml") as fm |
| source("nn/layers/log_loss.dml") as log_loss |
| source("nn/layers/sigmoid.dml") as sigmoid |
| source("nn/layers/l2_reg.dml") as l2_reg |
| source("nn/layers/cross_entropy_loss.dml") as cross_entropy_loss |
| |
| train = function(matrix[double] X, matrix[double] y, matrix[double] X_val, matrix[double] y_val) |
| return (matrix[double] w0, matrix[double] W, matrix[double] V, double loss) { |
| /* |
| * Trains the FM model. |
| * |
| * Inputs: |
| * - X : n examples with d features, of shape (n, d). |
| * - y : label corresponds to n examples |
| * - lambda : regularization (5e-04) |
| * |
| * Outputs: |
| * - w0, W, V : updated model parameters. |
| * - loss : computed loss with log_loss. |
| * |
| * input propagation through layers |
| * fm::init -> adam::init -> fm::forward -> sigmoid::forward -> log_loss::forward \ |
| * adam::update <- fm::backward <- sigmoid::backward <- log_loss::backward <- |
| */ |
| |
| n = nrow(X); |
| d = ncol(X); |
| k = 2; # factorization dimensionality, only(=2) possible for now. |
| |
| # 1.initialize fm core |
| [w0, W, V] = fm::init(d, k); |
| |
| # 2.initialize adam optimizer |
| ## Default values for some parameters |
| lr = 0.001; |
| beta1 = 0.9; # [0, 1) |
| beta2 = 0.999; # [0, 1) |
| epsilon = 0.00000001; |
| t = 0; |
| |
| # [mX, vX] = adam::init(X); # to optimize input. |
| [mw0, vw0] = adam::init(w0); |
| [mW, vW] = adam::init(W); |
| [mV, vV] = adam::init(V); |
| |
| # Regularization |
| lambda = 5e-04 |
| |
| # Optimize |
| print("Starting optimization") |
| batch_size = 10 |
| iters = ceil(1000 / batch_size) |
| epochs = 100; N = n; |
| for (e in 1:epochs) { |
| for (i in 1:iters) { |
| # Get the next batch |
| beg = ((i-1) * batch_size) %% N + 1 |
| end = min(N, beg + batch_size - 1) |
| X_batch = X[beg:end,] |
| y_batch = y[beg:end,] |
| |
| # 3.Send inputs through fm::forward |
| y_res = fm::forward(X_batch, w0, W, V); |
| |
| # 4.Send the above result through sigmoid::forward |
| sfy = sigmoid::forward(y_res); |
| |
| # 5.Send the above result through log_loss::forward |
| loss = log_loss::forward(sfy, y_batch); |
| |
| # Compute loss & accuracy for training & validation data every 100 iterations. |
| if (i %% 100 == 0) { |
| # Compute training loss & accuracy |
| loss_data = log_loss::forward(sfy, y_batch); |
| loss_reg_w0 = l2_reg::forward(w0, lambda); |
| loss_reg_W = l2_reg::forward(W, lambda); |
| loss_reg_V = l2_reg::forward(V, lambda); |
| |
| accuracy = mean((sfy<0.5) == (y_batch<0.5)); |
| loss = loss_data + loss_reg_w0 + loss_reg_W + loss_reg_V; |
| |
| # Compute validation loss & accuracy |
| probs_val = predict(X_val, w0, W, V) |
| loss_val = log_loss::forward(probs_val, y_val) |
| accuracy_val = mean((probs_val<0.5) == (y_val<0.5)) |
| |
| # Output results |
| print("Epoch: " + e + ", Iter: " + i + ", Train Loss: " + loss + ", Train Accuracy: " |
| + accuracy + ", Val Loss: " + loss_val + ", Val Accuracy: " + accuracy_val) |
| } |
| |
| # 6.Send the result of sigmoid::forward and the correct labels y to log_loss::backward |
| dsfy = log_loss::backward(sfy, y_batch); |
| |
| # 7.Send the above result through sigmoid::backward |
| dy = sigmoid::backward(dsfy, y_res); |
| |
| # 8.Send the above result through fm::backward |
| [dw0, dW, dV] = fm::backward(dy, X_batch, w0, W, V); |
| |
| # 9. update the timestep |
| t = e * i - 1; |
| |
| # 10.Call adam::update for all parameters |
| |
| # Incase we want to optimize inputs (X) also, as in deep dream. |
| #[X, mX, vX] = adam::update(X, dX, lr, beta1, beta2, epsilon, t, mX, vX); |
| |
| [w0, mw0, vw0] = adam::update(w0, dw0, lr, beta1, beta2, epsilon, t, mw0, vw0); |
| [W, mW, vW] = adam::update(W, dW, lr, beta1, beta2, epsilon, t, mW, vW ); |
| [V, mV, vV] = adam::update(V, dV, lr, beta1, beta2, epsilon, t, mV, vV ); |
| } |
| } |
| } |
| |
| predict = function(matrix[double] X, matrix[double] w0, matrix[double] W, matrix[double] V) |
| return (matrix[double] out) { |
| /* |
| * Computes the predictions for the given inputs. |
| * |
| * Inputs: |
| * - X : n examples with d features, of shape (n, d). |
| * - w0, W, V : trained model parameters. |
| * |
| * Outputs: |
| * - out : target vector, y. |
| */ |
| |
| # 1.initialize fm core |
| #[w0, W, V] = fm::init(d, k); |
| |
| # 2.Send inputs through fm::forward |
| y = fm::forward(X, w0, W, V); |
| |
| # 3.Send the above result through sigmoid::forward |
| out = sigmoid::forward(y); |
| |
| # 4.Send the above result through log_loss::forward |
| # loss = log_loss::forward(out); |
| |
| } |
| |
| eval = function(matrix[double] probs, matrix[double] y) |
| return (double loss, double accuracy) { |
| /** |
| * Computes loss and accuracy. |
| */ |
| |
| # 1. compute log loss |
| loss = log_loss::forward(probs, y); |
| |
| # 2. compute accuracy |
| accuracy = mean( (probs<0.5) == (y<0.5) ) |
| } |
| |