src/main/scala/org/template/textclassification/NBModel.scala - predictionio-template-text-classifier - Git at Google

 package org.template.textclassification

 import org.apache.spark.mllib.classification.NaiveBayes
 import org.apache.spark.mllib.classification.NaiveBayesModel

 import scala.math.exp

 class NBModel(
   val pd: PreparedData,
   lambda: Double
 ) extends Serializable {


   // 1. Fit a Naive Bayes model using the prepared data.

   private val nb : NaiveBayesModel = NaiveBayes.train(
     pd.transformData, lambda)


   // 2. Set up framework for performing the required Matrix
   // Multiplication for the prediction rule explained in the
   // tutorial.

   private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {
     require(x.length == y.length)

     x.zip(y).map(e => e._1 * e._2).sum
   }


   // 3. Given a document string, return a vector of corresponding
   // class membership probabilities.

   private def getScores(doc: String): Array[Double] = {
     // Helper function used to normalize probability scores.
     // Returns an object of type Array[Double]
     val normalize = (u: Array[Double]) => u.map(_ / u.sum)
     // Vectorize query,
     val x: Array[Double] = pd.transform(doc).toArray

     normalize(
       nb.pi
         .zip(nb.theta)
         .map(
           e => exp(innerProduct(e._2, x) + e._1))
     )
   }


   // 4. Implement predict method for our model using
   // the prediction rule given in tutorial.

   def predict(doc : String) : PredictedResult = {
     val x: Array[Double] = getScores(doc)
     val y: (Double, Double) = (nb.labels zip x).maxBy(_._2)
     new PredictedResult(pd.categoryMap.getOrElse(y._1, ""), y._2)
   }
 }
	package org.template.textclassification

	import org.apache.spark.mllib.classification.NaiveBayes
	import org.apache.spark.mllib.classification.NaiveBayesModel

	import scala.math.exp

	class NBModel(
	val pd: PreparedData,
	lambda: Double
	) extends Serializable {



	// 1. Fit a Naive Bayes model using the prepared data.

	private val nb : NaiveBayesModel = NaiveBayes.train(
	pd.transformData, lambda)



	// 2. Set up framework for performing the required Matrix
	// Multiplication for the prediction rule explained in the
	// tutorial.

	private def innerProduct (x : Array[Double], y : Array[Double]) : Double = {
	require(x.length == y.length)

	x.zip(y).map(e => e._1 * e._2).sum
	}



	// 3. Given a document string, return a vector of corresponding
	// class membership probabilities.

	private def getScores(doc: String): Array[Double] = {
	// Helper function used to normalize probability scores.
	// Returns an object of type Array[Double]
	val normalize = (u: Array[Double]) => u.map(_ / u.sum)
	// Vectorize query,
	val x: Array[Double] = pd.transform(doc).toArray

	normalize(
	nb.pi
	.zip(nb.theta)
	.map(
	e => exp(innerProduct(e._2, x) + e._1))
	)
	}



	// 4. Implement predict method for our model using
	// the prediction rule given in tutorial.

	def predict(doc : String) : PredictedResult = {
	val x: Array[Double] = getScores(doc)
	val y: (Double, Double) = (nb.labels zip x).maxBy(_._2)
	new PredictedResult(pd.categoryMap.getOrElse(y._1, ""), y._2)
	}
	}