blob: 4b542b33fa35e41156a5fe96f027b5a4d0b15f1d [file] [log] [blame]
package io.prediction.algorithms.scalding.itemsim.latestrank
import org.specs2.mutable._
import com.twitter.scalding._
import io.prediction.commons.scalding.appdata.Items
import io.prediction.commons.scalding.modeldata.{ItemSimScores}
import io.prediction.commons.filepath.{AlgoFile}
class LatestRankTest extends Specification with TupleConversions {
def test(
algoid: Int,
modelSet: Boolean,
itypes: List[String],
numSimilarItems: Int,
recommendationTime: Long,
items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
itemSimScores: List[(String, String, String, String, Int, Boolean)]) = {
val training_dbType = "file"
val training_dbName = "testpath/"
val modeldata_dbType = "file"
val modeldata_dbName = "testpath/"
val hdfsRoot = "testpath/"
val appid = 7
val engineid = 10
val evalid = None
JobTest("io.prediction.algorithms.scalding.itemsim.latestrank.LatestRank")
.arg("training_dbType", training_dbType)
.arg("training_dbName", training_dbName)
.arg("modeldata_dbType", modeldata_dbType)
.arg("modeldata_dbName", modeldata_dbName)
.arg("hdfsRoot", hdfsRoot)
.arg("appid", appid.toString)
.arg("engineid", engineid.toString)
.arg("algoid", algoid.toString)
.arg("itypes", itypes)
.arg("numSimilarItems", numSimilarItems.toString)
.arg("modelSet", modelSet.toString)
.arg("recommendationTime", recommendationTime.toString)
.source(Items(appId=appid, itypes=Some(itypes), dbType=training_dbType, dbName=training_dbName, dbHost=None, dbPort=None).getSource, items)
.sink[(String, String, String, String, Int, Boolean)](ItemSimScores(dbType=modeldata_dbType, dbName=modeldata_dbName, dbHost=None, dbPort=None, algoid=algoid, modelset=modelSet).getSource) { outputBuffer =>
"correctly write ItemSimScores" in {
val outputList = outputBuffer.toList
// convert score to double and compare
// because double have different string representation
// eg 9.87654321E9 vs 9876543210.0
val outputListDouble = outputList.map { x => (x._1, x._2, x._3.split(",").toList.map(_.toDouble), x._4, x._5, x._6) }
val expectedDouble = itemSimScores.map { x => (x._1, x._2, x._3.split(",").toList.map(_.toDouble), x._4, x._5, x._6) }
outputListDouble must containTheSameElementsAs(expectedDouble)
}
}
.run
.finish
}
val largeNumber: Long = scala.Long.MaxValue // larger than any item starttime
val noEndtime = "PIO_NONE"
/* test 1 */
val algoid = 12
val modelSet = false
val itypesT1T2 = List("t1", "t2")
val itypesAll = List("t1", "t2", "t3", "t4")
val items = List(
("i0", "t1,t2,t3", "19", "123456", "345678", noEndtime),
("i1", "t2,t3", "19", "123457", "567890", noEndtime),
("i2", "t4", "19", "21", "88", noEndtime),
("i3", "t3,t4", "19", "9876543210", "67890", noEndtime))
val itemSimScoresT1T2 = List(
("i3", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
("i2", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
("i1", "i0", "123456.0", "[t1,t2,t3]", algoid, modelSet),
("i0", "i1", "123457.0", "[t2,t3]", algoid, modelSet))
val itemSimScoresAll = List(
("i3", "i1,i0,i2", "123457.0,123456.0,21.0", "[t2,t3],[t1,t2,t3],[t4]", algoid, modelSet),
("i2", "i3,i1,i0", "9876543210.0,123457.0,123456.0", "[t3,t4],[t2,t3],[t1,t2,t3]", algoid, modelSet),
("i1", "i3,i0,i2", "9876543210.0,123456.0,21.0", "[t3,t4],[t1,t2,t3],[t4]", algoid, modelSet),
("i0", "i3,i1,i2", "9876543210.0,123457.0,21.0", "[t3,t4],[t2,t3],[t4]", algoid, modelSet))
val itemSimScoresAllTop2 = List(
("i3", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
("i2", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", algoid, modelSet),
("i1", "i3,i0", "9876543210.0,123456.0", "[t3,t4],[t1,t2,t3]", algoid, modelSet),
("i0", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", algoid, modelSet))
"latestrank.LatestRank with some itypes and numSimilarItems larger than number of items" should {
test(algoid, modelSet, itypesT1T2, 500, largeNumber, items, itemSimScoresT1T2)
}
"latestrank.LatestRank with all itypes and numSimilarItems larger than number of items" should {
test(algoid, modelSet, itypesAll, 500, largeNumber, items, itemSimScoresAll)
}
"latestrank.LatestRank with all itypes numSimilarItems smaller than number of items" should {
test(algoid, modelSet, itypesAll, 2, largeNumber, items, itemSimScoresAllTop2)
}
/* test 2: test starttime and endtime */
// starttime, endtime
// i0 A |---------|
// i1 B |---------|E
// i2 C|---------|
// i3 |---------|
// D F G
val tA = 123122
val tB = 123123
val tC = 123457
val tD = 123679
val tE = 543322
val tF = 543654
val tG = 543655
val test2Algoid = 12
val test2ModelSet = false
val test2ItypesAll = List("t1", "t2", "t3", "t4")
val test2Items = List(
("i0", "t1,t2,t3", "19", "123123", "4", "543210"),
("i1", "t2,t3", "19", "123456", "5", "543321"),
("i2", "t4", "19", "123567", "6", "543432"),
("i3", "t3,t4", "19", "123678", "7", "543654"))
val test2Users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
val test2ItemSimScoresAll = List(
("i0", "i3,i2,i1", "123678.0,123567.0,123456.0", "[t3,t4],[t4],[t2,t3]", test2Algoid, test2ModelSet),
("i1", "i3,i2,i0", "123678.0,123567.0,123123.0", "[t3,t4],[t4],[t1,t2,t3]", test2Algoid, test2ModelSet),
("i2", "i3,i1,i0", "123678.0,123456.0,123123.0", "[t3,t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
("i3", "i2,i1,i0", "123567.0,123456.0,123123.0", "[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
val test2ItemSimScoresEmpty = List()
val test2ItemSimScoresi0 = List(
("i1", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
("i2", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
("i3", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet))
val test2ItemSimScoresi0i1 = List(
("i0", "i1", "123456.0", "[t2,t3]", test2Algoid, test2ModelSet),
("i1", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
("i2", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
("i3", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
val test2ItemSimScoresi2i3 = List(
("i0", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
("i1", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
("i2", "i3", "123678.0", "[t3,t4]", test2Algoid, test2ModelSet),
("i3", "i2", "123567.0", "[t4]", test2Algoid, test2ModelSet))
"recommendationTime < all item starttime" should {
test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
}
"recommendationTime == earliest starttime" should {
test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tB, test2Items, test2ItemSimScoresi0)
}
"recommendationTime > some items starttime" should {
test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tC, test2Items, test2ItemSimScoresi0i1)
}
"recommendationTime > all item starttime and < all item endtime" should {
test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tD, test2Items, test2ItemSimScoresAll)
}
"recommendationTime > some item endtime" should {
test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tE, test2Items, test2ItemSimScoresi2i3)
}
"recommendationTime == last item endtime" should {
test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
}
"recommendationTime > last item endtime" should {
test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
}
}