| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # How to invoke this dml script LinearRegression.dml? |
| # Assume LR_HOME is set to the home of the dml script |
| # Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR |
| # Assume rows = 50 and cols = 30 for v, eps = 0.00000001 |
| # hadoop jar SystemDS.jar -f $LR_HOME/LinearRegression.dml -args "$INPUT_DIR/v" "$INPUT_DIR/y" 0.00000001 "$OUTPUT_DIR/w" |
| |
| V = read($1); |
| y = read($2); |
| |
| eps = $3; |
| |
| r = -(t(V) %*% y); |
| p = -r; |
| norm_r2 = sum(r * r); |
| w = Rand(rows = ncol(V), cols = 1, min = 0, max = 0); |
| |
| max_iteration = 3; |
| i = 0; |
| while(i < max_iteration) { |
| q = ((t(V) %*% (V %*% p)) + eps * p); |
| alpha = norm_r2 / as.scalar(t(p) %*% q); |
| w = w + alpha * p; |
| old_norm_r2 = norm_r2; |
| r = r + alpha * q; |
| norm_r2 = sum(r * r); |
| beta = norm_r2 / old_norm_r2; |
| p = -r + beta * p; |
| i = i + 1; |
| } |
| |
| write(w, $4, format="text"); |