| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # Scale and center individual features in the input matrix (column wise.) using z-score to scale the values. |
| # --------------------------------------------------------------------------------------------- |
| # NAME TYPE DEFAULT MEANING |
| # --------------------------------------------------------------------------------------------- |
| # X Matrix --- Input feature matrix |
| # Center Boolean TRUE Indicates whether or not to center the feature matrix |
| # Scale Boolean TRUE Indicates whether or not to scale the feature matrix |
| # --------------------------------------------------------------------------------------------- |
| # Y Matrix --- Output feature matrix with K columns |
| # ColMean Matrix --- The column means of the input, subtracted if Center was TRUE |
| # ScaleFactor Matrix --- The Scaling of the values, to make each dimension have similar value ranges |
| # --------------------------------------------------------------------------------------------- |
| |
| m_scale = function(Matrix[Double] X, Boolean center, Boolean scale) |
| return (Matrix[Double] Y, Matrix[Double] ColMean, Matrix[Double] ScaleFactor) |
| { |
| if(center){ |
| ColMean = colMeans(X) |
| X = X - ColMean |
| } |
| else { |
| # Allocate the ColMean as an empty matrix, |
| # to return something on the function call. |
| ColMean = matrix(0,rows=0,cols=0) |
| } |
| |
| if (scale) { |
| N = nrow(X) |
| |
| ScaleFactor = sqrt(colSums(X^2)/(N-1)) |
| |
| # Replace entries in the scale factor that are 0 with 1. |
| # To avoid division by 0, introducing NaN to the ouput. |
| ScaleFactor = replace(target=ScaleFactor, |
| pattern=0, replacement=1); |
| |
| X = X/ScaleFactor |
| |
| } |
| else{ |
| # Allocate the Scale factor as an empty matrix, |
| # to return something on the function call. |
| ScaleFactor = matrix(0, rows= 0, cols=0) |
| } |
| |
| Y = X |
| } |