scripts/builtin/lmPredictStats.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 # This builtin function computes and prints a summary of accuracy
 # measures for regression problems.
 #
 # INPUT:
 # ------------------------------------------------------------
 # yhat     A column vector of predicted response values y
 # ytest    A column vector of actual response values y
 # lm       An indicator if used for linear regression model
 # ------------------------------------------------------------
 #
 # OUTPUT:
 # ------------------------------------------------------------
 # R        A column vector holding avg_res, ss_avg_res, and R2
 # ------------------------------------------------------------

 m_lmPredictStats = function(Matrix[Double] yhat, Matrix[Double] ytest, Boolean lm)
   return (Matrix[Double] R)
 {
   print ("\n\nComputing the statistics...");
   n = nrow(ytest)

   sum_y_test = sum(ytest)
   mean_y_test = sum_y_test / n
   sum_sq_y_test = sum(ytest^2)

   y_residual = ytest - yhat;
   avg_res = sum(y_residual) / n;
   ss_res = sum(y_residual^2);
   ss_avg_res = ss_res - n * avg_res^2;
   if( lm )
     R2 = 1 - ss_res / (sum_sq_y_test - n * (sum_y_test/n)^2);
   else
     R2 = sum((yhat - mean_y_test)^2) / sum((ytest - mean_y_test)^2)

   avg_tot = sum_y_test / n;
   ss_tot = sum_sq_y_test;
   ss_avg_tot = ss_tot - n * avg_tot ^ 2;
   var_tot = ss_avg_tot / (n - 1);
   R2_nobias = 1 - ss_avg_res / ss_avg_tot;

   print("sum(ytest) = " + sum_y_test)
   print("sum(yhat) = " + sum(yhat))
   print("SS_AVG_RES_Y: " + ss_avg_res)
   # Average of the response value Y
   print("AVG_TOT_Y, " + avg_tot)
   # Standard Deviation of the response value Y
   print("STDEV_TOT_Y, " + sqrt(var_tot))
   # Average of the residual Y - pred(Y|X), i.e. residual bias
   print("AVG_RES_Y, " + avg_res)
   # R^2 of residual with bias included vs. total average
   print("R2, " + R2)
   # R^2 of residual with bias subtracted vs. total average<Paste>
   print("R2_NOBIAS, " + R2_nobias)
   # Adjusted R^2 of residual with bias subtracted vs. total average

   R = as.matrix(list(avg_res, ss_avg_res, R2));
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	# This builtin function computes and prints a summary of accuracy
	# measures for regression problems.
	#
	# INPUT:
	# ------------------------------------------------------------
	# yhat A column vector of predicted response values y
	# ytest A column vector of actual response values y
	# lm An indicator if used for linear regression model
	# ------------------------------------------------------------
	#
	# OUTPUT:
	# ------------------------------------------------------------
	# R A column vector holding avg_res, ss_avg_res, and R2
	# ------------------------------------------------------------

	m_lmPredictStats = function(Matrix[Double] yhat, Matrix[Double] ytest, Boolean lm)
	return (Matrix[Double] R)
	{
	print ("\n\nComputing the statistics...");
	n = nrow(ytest)

	sum_y_test = sum(ytest)
	mean_y_test = sum_y_test / n
	sum_sq_y_test = sum(ytest^2)

	y_residual = ytest - yhat;
	avg_res = sum(y_residual) / n;
	ss_res = sum(y_residual^2);
	ss_avg_res = ss_res - n * avg_res^2;
	if( lm )
	R2 = 1 - ss_res / (sum_sq_y_test - n * (sum_y_test/n)^2);
	else
	R2 = sum((yhat - mean_y_test)^2) / sum((ytest - mean_y_test)^2)

	avg_tot = sum_y_test / n;
	ss_tot = sum_sq_y_test;
	ss_avg_tot = ss_tot - n * avg_tot ^ 2;
	var_tot = ss_avg_tot / (n - 1);
	R2_nobias = 1 - ss_avg_res / ss_avg_tot;

	print("sum(ytest) = " + sum_y_test)
	print("sum(yhat) = " + sum(yhat))
	print("SS_AVG_RES_Y: " + ss_avg_res)
	# Average of the response value Y
	print("AVG_TOT_Y, " + avg_tot)
	# Standard Deviation of the response value Y
	print("STDEV_TOT_Y, " + sqrt(var_tot))
	# Average of the residual Y - pred(Y\|X), i.e. residual bias
	print("AVG_RES_Y, " + avg_res)
	# R^2 of residual with bias included vs. total average
	print("R2, " + R2)
	# R^2 of residual with bias subtracted vs. total average<Paste>
	print("R2_NOBIAS, " + R2_nobias)
	# Adjusted R^2 of residual with bias subtracted vs. total average

	R = as.matrix(list(avg_res, ss_avg_res, R2));
	}