scripts/builtin/naivebayes.dml - systemds - Git at Google

 #-------------------------------------------------------------
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #   http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing,
 # software distributed under the License is distributed on an
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language governing permissions and limitations
 # under the License.
 #
 #-------------------------------------------------------------

 m_naivebayes = function(Matrix[Double] D, Matrix[Double] C, Double laplace = 1, Boolean verbose = TRUE)
   return (Matrix[Double] prior, Matrix[Double] classConditionals)
 {
   laplaceCorrection = laplace;
   numRows = nrow(D);
   numFeatures = ncol(D);
   numClasses = max(C);

   # Compute conditionals
   # Compute the feature counts for each class
   classFeatureCounts = aggregate(target=D, groups=C, fn="sum", ngroups=as.integer(numClasses));

   # Compute the total feature count for each class
   # and add the number of features to this sum
   # for subsequent regularization (Laplace's rule)
   classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection;

   # Compute class conditional probabilities
   classConditionals = (classFeatureCounts + laplaceCorrection) / classSums;

   # Compute class priors
   classCounts = aggregate(target=C, groups=C, fn="count", ngroups=as.integer(numClasses));
   prior = classCounts / numRows;

   # Compute accuracy on training set
   if( verbose ) {
     logProbs = D %*% t(log(classConditionals)) + t(log(prior));
     acc = sum(rowIndexMax(logProbs) == C) / numRows * 100;
     print("Training Accuracy (%): " + acc);
   }
 }
	#-------------------------------------------------------------
	#
	# Licensed to the Apache Software Foundation (ASF) under one
	# or more contributor license agreements. See the NOTICE file
	# distributed with this work for additional information
	# regarding copyright ownership. The ASF licenses this file
	# to you under the Apache License, Version 2.0 (the
	# "License"); you may not use this file except in compliance
	# with the License. You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing,
	# software distributed under the License is distributed on an
	# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
	# KIND, either express or implied. See the License for the
	# specific language governing permissions and limitations
	# under the License.
	#
	#-------------------------------------------------------------

	m_naivebayes = function(Matrix[Double] D, Matrix[Double] C, Double laplace = 1, Boolean verbose = TRUE)
	return (Matrix[Double] prior, Matrix[Double] classConditionals)
	{
	laplaceCorrection = laplace;
	numRows = nrow(D);
	numFeatures = ncol(D);
	numClasses = max(C);

	# Compute conditionals
	# Compute the feature counts for each class
	classFeatureCounts = aggregate(target=D, groups=C, fn="sum", ngroups=as.integer(numClasses));

	# Compute the total feature count for each class
	# and add the number of features to this sum
	# for subsequent regularization (Laplace's rule)
	classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection;

	# Compute class conditional probabilities
	classConditionals = (classFeatureCounts + laplaceCorrection) / classSums;

	# Compute class priors
	classCounts = aggregate(target=C, groups=C, fn="count", ngroups=as.integer(numClasses));
	prior = classCounts / numRows;

	# Compute accuracy on training set
	if( verbose ) {
	logProbs = D %*% t(log(classConditionals)) + t(log(prior));
	acc = sum(rowIndexMax(logProbs) == C) / numRows * 100;
	print("Training Accuracy (%): " + acc);
	}
	}