process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRankTest.scala - predictionio - Git at Google

 package io.prediction.algorithms.scalding.itemrec.randomrank

 import org.specs2.mutable._

 import com.twitter.scalding._

 import io.prediction.commons.scalding.appdata.{ Items, Users }
 import io.prediction.commons.scalding.modeldata.{ ItemRecScores }
 import io.prediction.commons.filepath.{ AlgoFile }

 class RandomRankTest extends Specification with TupleConversions {

   def test(algoid: Int, modelSet: Boolean,
     itypes: List[String],
     numRecommendations: Int,
     items: List[(String, String)],
     users: List[(String, String)],
     itemRecScores: List[(String, String, String, String, Int, Boolean)]) = {

     val training_dbType = "file"
     val training_dbName = "testpath/"

     val modeldata_dbType = "file"
     val modeldata_dbName = "testpath/"

     val hdfsRoot = "testpath/"

     val appid = 7
     val engineid = 10
     val evalid = None

     JobTest("io.prediction.algorithms.scalding.itemrec.randomrank.RandomRank")
       .arg("training_dbType", training_dbType)
       .arg("training_dbName", training_dbName)
       .arg("modeldata_dbType", modeldata_dbType)
       .arg("modeldata_dbName", modeldata_dbName)
       .arg("hdfsRoot", hdfsRoot)
       .arg("appid", appid.toString)
       .arg("engineid", engineid.toString)
       .arg("algoid", algoid.toString)
       .arg("itypes", itypes)
       .arg("numRecommendations", numRecommendations.toString)
       .arg("modelSet", modelSet.toString)
       .source(Items(appId = appid, itypes = Some(itypes),
         dbType = training_dbType, dbName = training_dbName, dbHost = None, dbPort = None).getSource, items)
       .source(Users(appId = appid,
         dbType = training_dbType, dbName = training_dbName, dbHost = None, dbPort = None).getSource, users)
       /*
       .sink[(String, String)](Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, evalid, "itemRecScores.tsv"))) { outputBuffer =>

         def takeOutScoresAndItypes(d: List[(String, String, Double, String, Int, Boolean)]) = {
           d map {x => (x._1, x._2) }
         }

         "generate correct user and item pairs in AlgoFile" in {
           val result = outputBuffer.toList
           val expected = takeOutScoresAndItypes(itemRecScores)

           result must containTheSameElementsAs(expected)
         }

       }*/
       .sink[(String, String, String, String, Int, Boolean)](ItemRecScores(dbType = modeldata_dbType, dbName = modeldata_dbName, dbHost = None, dbPort = None, algoid = algoid, modelset = modelSet).getSource) { outputBuffer =>

         def takeOutScores(d: List[(String, String, String, String, Int, Boolean)]) = {
           // don't check score and itypes.
           // for iids, don't check order. convert to set
           d map { x => (x._1, x._2.split(",").toSet, x._5, x._6) }
         }

         def getScoresOnly(d: List[(String, String, String, String, Int, Boolean)]) = {
           d flatMap { x => x._3.split(",").toList.map(_.toDouble) }
         }

         def getIids(d: List[(String, String, String, String, Int, Boolean)]) = {
           // List(List("i0", "i1"), List("i1", "i0"), ...)

           d map { x => x._2.split(",").toList }
         }

         def getItypes(d: List[(String, String, String, String, Int, Boolean)]) = {
           //("u0", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
           // => List( List( (i0, List(t1,t2,t3)) , (i1, List(t2,t3)) )
           d.map { x =>
             val itypesList = x._4.split("],").toList.map(x => x.stripPrefix("[").stripSuffix("]").split(",").toList)
             val iidList = x._2.split(",").toList

             iidList zip itypesList
           }
         }

         "generate correct user and item pairs in modeldata" in {

           // don't check scores since they are random
           val result = takeOutScores(outputBuffer.toList)
           val expected = takeOutScores(itemRecScores)

           result must containTheSameElementsAs(expected)

         }

         "generate different scores for each pair in modeldata" in {
           // very simple way to check if the scores are random
           // (just to check if they are different)

           val scoresList: List[Double] = getScoresOnly(outputBuffer.toList)
           val scoresSet = scoresList.toSet

           scoresSet.size must be_==(scoresList.size)

         }

         "not generate same order of iid for all uid group" in {

           getIids(outputBuffer.toList).toSet.size must be_>(1)

         }

         "itypes order match the iids order" in {

           // extract (iid, itypes) from the output
           val itypesList = getItypes(outputBuffer.toList)
           val itemsMap = items.toMap

           // use the iid only and contruct the (iid, itypes)
           val expected = getIids(outputBuffer.toList).map(x =>
             // x is List of iid
             // create the List of item types using the iid
             x.map(x => (x, itemsMap(x).split(",").toList))
           )

           itypesList must be_==(expected)

         }

       }
       .run
       .finish
   }

   "randomrank.RandomRank with selected itypes" should {

     val algoid = 12
     val modelSet = false
     val itypes = List("t1", "t2")
     val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
     val users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
     val itemRecScores = List(
       ("u0", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
       ("u1", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
       ("u2", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
       ("u3", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet))

     test(algoid, modelSet, itypes, 500, items, users, itemRecScores)

   }

   "randomrank.RandomRank with all itypes" should {

     val algoid = 12
     val modelSet = false
     val itypes = List("")
     val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
     val users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
     val itemRecScores = List(
       ("u0", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
       ("u1", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
       ("u2", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
       ("u3", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet))

     test(algoid, modelSet, itypes, 500, items, users, itemRecScores)

   }

   // TODO: test with smaller number of numRecommendations (but can't know expected result beacause the score is random...)

 }
	package io.prediction.algorithms.scalding.itemrec.randomrank

	import org.specs2.mutable._

	import com.twitter.scalding._

	import io.prediction.commons.scalding.appdata.{ Items, Users }
	import io.prediction.commons.scalding.modeldata.{ ItemRecScores }
	import io.prediction.commons.filepath.{ AlgoFile }

	class RandomRankTest extends Specification with TupleConversions {

	def test(algoid: Int, modelSet: Boolean,
	itypes: List[String],
	numRecommendations: Int,
	items: List[(String, String)],
	users: List[(String, String)],
	itemRecScores: List[(String, String, String, String, Int, Boolean)]) = {

	val training_dbType = "file"
	val training_dbName = "testpath/"

	val modeldata_dbType = "file"
	val modeldata_dbName = "testpath/"

	val hdfsRoot = "testpath/"

	val appid = 7
	val engineid = 10
	val evalid = None

	JobTest("io.prediction.algorithms.scalding.itemrec.randomrank.RandomRank")
	.arg("training_dbType", training_dbType)
	.arg("training_dbName", training_dbName)
	.arg("modeldata_dbType", modeldata_dbType)
	.arg("modeldata_dbName", modeldata_dbName)
	.arg("hdfsRoot", hdfsRoot)
	.arg("appid", appid.toString)
	.arg("engineid", engineid.toString)
	.arg("algoid", algoid.toString)
	.arg("itypes", itypes)
	.arg("numRecommendations", numRecommendations.toString)
	.arg("modelSet", modelSet.toString)
	.source(Items(appId = appid, itypes = Some(itypes),
	dbType = training_dbType, dbName = training_dbName, dbHost = None, dbPort = None).getSource, items)
	.source(Users(appId = appid,
	dbType = training_dbType, dbName = training_dbName, dbHost = None, dbPort = None).getSource, users)
	/*
	.sink[(String, String)](Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, evalid, "itemRecScores.tsv"))) { outputBuffer =>

	def takeOutScoresAndItypes(d: List[(String, String, Double, String, Int, Boolean)]) = {
	d map {x => (x._1, x._2) }
	}

	"generate correct user and item pairs in AlgoFile" in {
	val result = outputBuffer.toList
	val expected = takeOutScoresAndItypes(itemRecScores)

	result must containTheSameElementsAs(expected)
	}

	}*/
	.sink[(String, String, String, String, Int, Boolean)](ItemRecScores(dbType = modeldata_dbType, dbName = modeldata_dbName, dbHost = None, dbPort = None, algoid = algoid, modelset = modelSet).getSource) { outputBuffer =>

	def takeOutScores(d: List[(String, String, String, String, Int, Boolean)]) = {
	// don't check score and itypes.
	// for iids, don't check order. convert to set
	d map { x => (x._1, x._2.split(",").toSet, x._5, x._6) }
	}

	def getScoresOnly(d: List[(String, String, String, String, Int, Boolean)]) = {
	d flatMap { x => x._3.split(",").toList.map(_.toDouble) }
	}

	def getIids(d: List[(String, String, String, String, Int, Boolean)]) = {
	// List(List("i0", "i1"), List("i1", "i0"), ...)

	d map { x => x._2.split(",").toList }
	}

	def getItypes(d: List[(String, String, String, String, Int, Boolean)]) = {
	//("u0", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
	// => List( List( (i0, List(t1,t2,t3)) , (i1, List(t2,t3)) )
	d.map { x =>
	val itypesList = x._4.split("],").toList.map(x => x.stripPrefix("[").stripSuffix("]").split(",").toList)
	val iidList = x._2.split(",").toList

	iidList zip itypesList
	}
	}

	"generate correct user and item pairs in modeldata" in {

	// don't check scores since they are random
	val result = takeOutScores(outputBuffer.toList)
	val expected = takeOutScores(itemRecScores)

	result must containTheSameElementsAs(expected)

	}

	"generate different scores for each pair in modeldata" in {
	// very simple way to check if the scores are random
	// (just to check if they are different)

	val scoresList: List[Double] = getScoresOnly(outputBuffer.toList)
	val scoresSet = scoresList.toSet

	scoresSet.size must be_==(scoresList.size)

	}

	"not generate same order of iid for all uid group" in {

	getIids(outputBuffer.toList).toSet.size must be_>(1)

	}

	"itypes order match the iids order" in {

	// extract (iid, itypes) from the output
	val itypesList = getItypes(outputBuffer.toList)
	val itemsMap = items.toMap

	// use the iid only and contruct the (iid, itypes)
	val expected = getIids(outputBuffer.toList).map(x =>
	// x is List of iid
	// create the List of item types using the iid
	x.map(x => (x, itemsMap(x).split(",").toList))
	)

	itypesList must be_==(expected)

	}

	}
	.run
	.finish
	}

	"randomrank.RandomRank with selected itypes" should {

	val algoid = 12
	val modelSet = false
	val itypes = List("t1", "t2")
	val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
	val users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
	val itemRecScores = List(
	("u0", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
	("u1", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
	("u2", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
	("u3", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet))

	test(algoid, modelSet, itypes, 500, items, users, itemRecScores)

	}

	"randomrank.RandomRank with all itypes" should {

	val algoid = 12
	val modelSet = false
	val itypes = List("")
	val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
	val users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
	val itemRecScores = List(
	("u0", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
	("u1", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
	("u2", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
	("u3", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet))

	test(algoid, modelSet, itypes, 500, items, users, itemRecScores)

	}

	// TODO: test with smaller number of numRecommendations (but can't know expected result beacause the score is random...)

	}