scripts/builtin/scale.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 # This function scales and center individual features in the input
 # matrix (column wise.) using z-score to scale the values.
 # The transformation is sometimes also called scale and shift,
 # but it is shifted first and then subsequently scaled.
 #
 # The method is not resistant to inputs containing NaN nor overflows
 # of doubles, but handle it by guaranteeing that no extra NaN values
 # are introduced and columns that contain NaN will not be scaled or shifted.
 #
 # INPUT:
 # --------------------------------------------------------------------------------------
 # X       Input feature matrix
 # center  Indicates to center the feature matrix
 # scale   Indicates to scale the feature matrix according to z-score
 # --------------------------------------------------------------------------------------
 #
 # OUTPUT:
 # -------------------------------------------------------------------------------------------
 # Out          Output feature matrix scaled and shifted
 # Centering    The column means of the input, subtracted if Center was TRUE
 # ScaleFactor  The scaling of the values, to make each dimension have similar value ranges
 # -------------------------------------------------------------------------------------------

 m_scale = function(Matrix[Double] X, Boolean center=TRUE, Boolean scale=TRUE)
   return (Matrix[Double] Out, Matrix[Double] Centering, Matrix[Double] ScaleFactor)
 {
   # Allocate the Centering and ScaleFactor as empty matrices,
   # to return something on the function call.
   Centering = matrix(0, rows=0, cols=0)
   ScaleFactor = matrix(0, rows= 0, cols=0)

   if(center){
     Centering = colMeans(X)
     # Replace entries with Nan with 0 to avoid introducing more NaN values.
     Centering = replace(target=Centering, pattern=NaN, replacement=0);
     X = X - Centering
   }

   if (scale) {
     N = nrow(X)
     ScaleFactor = sqrt(colSums(X^2) / (N - 1))

     # Replace entries in the scale factor that are 0 and NaN with 1.
     # To avoid division by 0 or NaN, introducing NaN to the ouput.
     ScaleFactor = replace(target=ScaleFactor, pattern=NaN, replacement=1);
     ScaleFactor = replace(target=ScaleFactor, pattern=0, replacement=1);
     X = X / ScaleFactor
   }

   # assign output to the returned value.
   Out = X
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	# This function scales and center individual features in the input
	# matrix (column wise.) using z-score to scale the values.
	# The transformation is sometimes also called scale and shift,
	# but it is shifted first and then subsequently scaled.
	#
	# The method is not resistant to inputs containing NaN nor overflows
	# of doubles, but handle it by guaranteeing that no extra NaN values
	# are introduced and columns that contain NaN will not be scaled or shifted.
	#
	# INPUT:
	# --------------------------------------------------------------------------------------
	# X Input feature matrix
	# center Indicates to center the feature matrix
	# scale Indicates to scale the feature matrix according to z-score
	# --------------------------------------------------------------------------------------
	#
	# OUTPUT:
	# -------------------------------------------------------------------------------------------
	# Out Output feature matrix scaled and shifted
	# Centering The column means of the input, subtracted if Center was TRUE
	# ScaleFactor The scaling of the values, to make each dimension have similar value ranges
	# -------------------------------------------------------------------------------------------

	m_scale = function(Matrix[Double] X, Boolean center=TRUE, Boolean scale=TRUE)
	return (Matrix[Double] Out, Matrix[Double] Centering, Matrix[Double] ScaleFactor)
	{
	# Allocate the Centering and ScaleFactor as empty matrices,
	# to return something on the function call.
	Centering = matrix(0, rows=0, cols=0)
	ScaleFactor = matrix(0, rows= 0, cols=0)

	if(center){
	Centering = colMeans(X)
	# Replace entries with Nan with 0 to avoid introducing more NaN values.
	Centering = replace(target=Centering, pattern=NaN, replacement=0);
	X = X - Centering
	}

	if (scale) {
	N = nrow(X)
	ScaleFactor = sqrt(colSums(X^2) / (N - 1))

	# Replace entries in the scale factor that are 0 and NaN with 1.
	# To avoid division by 0 or NaN, introducing NaN to the ouput.
	ScaleFactor = replace(target=ScaleFactor, pattern=NaN, replacement=1);
	ScaleFactor = replace(target=ScaleFactor, pattern=0, replacement=1);
	X = X / ScaleFactor
	}

	# assign output to the returned value.
	Out = X
	}