mnemonic-benches/mnemonic-spark-kmeans-bench/src/main/scala/org/apache/mnemonic/bench/RegularKMeans.scala - mnemonic - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *    http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 /*
  * A copy of Apache Spark KMeansSample.scala @52facb0062a4253fa45ac0c633d0510a9b684a62
  */

 // scalastyle:off println
 package org.apache.mnemonic.bench

 import org.apache.spark.{SparkConf, SparkContext}
 // $example on$
 import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
 import org.apache.spark.mllib.linalg.Vectors
 // $example off$

 object RegularKMeans {

   def main(args: Array[String]) {

     if (args.length == 0) {
         println("no input file name")
         System.exit(1)
     }

     val conf = new SparkConf().setAppName("RegularKMeans")
     val sc = new SparkContext(conf)

     val start = System.currentTimeMillis

     // $example on$
     // Load and parse the data
     val data = sc.textFile(args(0))
     //val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
     val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))

     // Cluster the data into two classes using KMeans
     val numClusters = 2
     val numIterations = 20
     val clusters = KMeans.train(parsedData, numClusters, numIterations)

     // Evaluate clustering by computing Within Set Sum of Squared Errors
     val WSSSE = clusters.computeCost(parsedData)

     val totalTime = System.currentTimeMillis - start

     println("Within Set Sum of Squared Errors = " + WSSSE)

     println("Total count of processed points = " + parsedData.count) // verify code

     println("Elapsed time: %1d s".format(totalTime/1000))

     // Save and load model
     //clusters.save(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
     //val sameModel = KMeansModel.load(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
     // $example off$

     sc.stop()
   }
 }
 // scalastyle:on println
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	/*
	* A copy of Apache Spark KMeansSample.scala @52facb0062a4253fa45ac0c633d0510a9b684a62
	*/

	// scalastyle:off println
	package org.apache.mnemonic.bench

	import org.apache.spark.{SparkConf, SparkContext}
	// $example on$
	import org.apache.spark.mllib.clustering.{KMeans, KMeansModel}
	import org.apache.spark.mllib.linalg.Vectors
	// $example off$

	object RegularKMeans {

	def main(args: Array[String]) {

	if (args.length == 0) {
	println("no input file name")
	System.exit(1)
	}

	val conf = new SparkConf().setAppName("RegularKMeans")
	val sc = new SparkContext(conf)

	val start = System.currentTimeMillis

	// $example on$
	// Load and parse the data
	val data = sc.textFile(args(0))
	//val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble))).cache()
	val parsedData = data.map(s => Vectors.dense(s.split(' ').map(_.toDouble)))

	// Cluster the data into two classes using KMeans
	val numClusters = 2
	val numIterations = 20
	val clusters = KMeans.train(parsedData, numClusters, numIterations)

	// Evaluate clustering by computing Within Set Sum of Squared Errors
	val WSSSE = clusters.computeCost(parsedData)

	val totalTime = System.currentTimeMillis - start

	println("Within Set Sum of Squared Errors = " + WSSSE)

	println("Total count of processed points = " + parsedData.count) // verify code

	println("Elapsed time: %1d s".format(totalTime/1000))

	// Save and load model
	//clusters.save(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
	//val sameModel = KMeansModel.load(sc, "target/org/apache/spark/KMeansExample/KMeansModel")
	// $example off$

	sc.stop()
	}
	}
	// scalastyle:on println