| #------------------------------------------------------------- |
| # |
| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| # |
| #------------------------------------------------------------- |
| |
| # This function scales and center individual features in the input |
| # matrix (column wise.) using z-score to scale the values. |
| # The transformation is sometimes also called scale and shift, |
| # but it is shifted first and then subsequently scaled. |
| # |
| # The method is not resistant to inputs containing NaN nor overflows |
| # of doubles, but handle it by guaranteeing that no extra NaN values |
| # are introduced and columns that contain NaN will not be scaled or shifted. |
| # |
| # INPUT: |
| # -------------------------------------------------------------------------------------- |
| # X Input feature matrix |
| # center Indicates to center the feature matrix |
| # scale Indicates to scale the feature matrix according to z-score |
| # -------------------------------------------------------------------------------------- |
| # |
| # OUTPUT: |
| # ------------------------------------------------------------------------------------------- |
| # Out Output feature matrix scaled and shifted |
| # Centering The column means of the input, subtracted if Center was TRUE |
| # ScaleFactor The scaling of the values, to make each dimension have similar value ranges |
| # ------------------------------------------------------------------------------------------- |
| |
| m_scale = function(Matrix[Double] X, Boolean center=TRUE, Boolean scale=TRUE) |
| return (Matrix[Double] Out, Matrix[Double] Centering, Matrix[Double] ScaleFactor) |
| { |
| # Allocate the Centering and ScaleFactor as empty matrices, |
| # to return something on the function call. |
| Centering = matrix(0, rows=0, cols=0) |
| ScaleFactor = matrix(0, rows= 0, cols=0) |
| |
| if(center){ |
| Centering = colMeans(X) |
| # Replace entries with Nan with 0 to avoid introducing more NaN values. |
| Centering = replace(target=Centering, pattern=NaN, replacement=0); |
| X = X - Centering |
| } |
| |
| if (scale) { |
| N = nrow(X) |
| ScaleFactor = sqrt(colSums(X^2) / (N - 1)) |
| |
| # Replace entries in the scale factor that are 0 and NaN with 1. |
| # To avoid division by 0 or NaN, introducing NaN to the ouput. |
| ScaleFactor = replace(target=ScaleFactor, pattern=NaN, replacement=1); |
| ScaleFactor = replace(target=ScaleFactor, pattern=0, replacement=1); |
| X = X / ScaleFactor |
| } |
| |
| # assign output to the returned value. |
| Out = X |
| } |