blob: 867ef75a774568c7e31c4bad44a883e8ac9bae9c [file] [log] [blame]
package io.prediction.algorithms.graphchi.itemrec
import io.prediction.commons.Config
import io.prediction.commons.settings.{ App, Algo }
import io.prediction.commons.modeldata.{ ItemRecScore }
import org.specs2.mutable._
import com.github.nscala_time.time.Imports._
import scala.io.Source
import java.io.File
import java.io.FileWriter
import java.io.BufferedWriter
import com.mongodb.casbah.Imports._
class GraphChiModelConstructorSpec extends Specification {
// note: should match the db name defined in the application.conf
val mongoDbName = "predictionio_modeldata_graphchi_dataprep_test"
def cleanUp() = {
// remove the test database
MongoConnection()(mongoDbName).dropDatabase()
}
val commonConfig = new Config
val modeldataItemRecScores = commonConfig.getModeldataItemRecScores
// NOTE: use HALF_UP mode to avoid error caused by rounding when compare data
// (eg. 3.5 vs 3.499999999999).
// (eg. 0.6666666666 vs 0.666666667)
def roundUpScores(irec: ItemRecScore): ItemRecScore = {
irec.copy(
scores = irec.scores.map { x =>
BigDecimal(x).setScale(9, BigDecimal.RoundingMode.HALF_UP).toDouble
}
)
}
def argMapToArray(args: Map[String, Any]): Array[String] = {
args.toArray.flatMap {
case (k, v) =>
Array(s"--${k}", v.toString)
}
}
"GraphChiModelConstructor" should {
val inputDir = "/tmp/pio_test/"
val inputDirFile = new File(inputDir)
inputDirFile.mkdirs()
val usersIndex = List(
"1\tu1",
"2\tu2",
"3\tu3")
val itemsIndex = List(
"1\ti1\tt1,t2",
"2\ti2\tt1",
"3\ti3\tt2,t3",
"4\ti4\tt3"
)
val seenCSV = List(
"1,1",
"1,2",
"1,3",
"2,1",
"2,4",
"3,2"
)
/*
* 1.2 2.4 1.1
* 4.3 1.1 2.4
*/
val ratingsUMM = List(
"%%MatrixMarket matrix array real general",
"%This file contains ALS output matrix U. In each row D factors of a single user node.",
"3 2",
"1.2",
"4.3",
"2.4",
"1.1",
"1.1",
"2.4"
)
/*
* 2.1 3.1 2.6 1.9
* 1.5 1.2 1.3 2.0
*/
val ratingsVMM = List(
"%%MatrixMarket matrix array real general",
"%This file contains ALS output matrix V. In each row D factors of a single item node.",
"4 2",
"2.1",
"1.5",
"3.1",
"1.2",
"2.6",
"1.3",
"1.9",
"2.0"
)
/* Ut V:
* 8.97 8.88 8.71 10.88
* 6.69 8.76 7.67 6.76
* 5.91 6.29 5.98 6.89
*/
def writeToFile(lines: List[String], filePath: String) = {
val writer = new BufferedWriter(new FileWriter(new File(filePath)))
lines.foreach { line =>
writer.write(s"${line}\n")
}
writer.close()
}
writeToFile(usersIndex, s"${inputDir}usersIndex.tsv")
writeToFile(itemsIndex, s"${inputDir}itemsIndex.tsv")
writeToFile(seenCSV, s"${inputDir}seen.csv")
writeToFile(ratingsUMM, s"${inputDir}ratings.mm_U.mm")
writeToFile(ratingsVMM, s"${inputDir}ratings.mm_V.mm")
val appid = 24
implicit val app = App(
id = appid,
userid = 0,
appkey = "1234",
display = "12345",
url = None,
cat = None,
desc = None,
timezone = "UTC"
)
"correctly writes ItemRecScores with larger numRecommendations" in {
val algoid = 25
val modelSet = false
implicit val algo = Algo(
id = algoid,
engineid = 1234,
name = "",
infoid = "abc",
command = "",
params = Map(),
settings = Map(),
modelset = modelSet,
createtime = DateTime.now,
updatetime = DateTime.now,
status = "deployed",
offlineevalid = None,
offlinetuneid = None,
loop = None,
paramset = None
)
val args = Map(
"inputDir" -> inputDir,
"appid" -> appid,
"algoid" -> algoid,
"modelSet" -> modelSet,
"unseenOnly" -> false,
"numRecommendations" -> 5
)
val u1Expected = ItemRecScore(
uid = "u1",
iids = Seq("i4", "i1", "i2", "i3"),
scores = Seq(10.88, 8.97, 8.88, 8.71),
itypes = Seq(Seq("t3"), Seq("t1", "t2"), Seq("t1"), Seq("t2", "t3")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u2Expected = ItemRecScore(
uid = "u2",
iids = Seq("i2", "i3", "i4", "i1"),
scores = Seq(8.76, 7.67, 6.76, 6.69),
itypes = Seq(Seq("t1"), Seq("t2", "t3"), Seq("t3"), Seq("t1", "t2")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u3Expected = ItemRecScore(
uid = "u3",
iids = Seq("i4", "i2", "i3", "i1"),
scores = Seq(6.89, 6.29, 5.98, 5.91),
itypes = Seq(Seq("t3"), Seq("t1"), Seq("t2", "t3"), Seq("t1", "t2")),
appid = appid,
algoid = algoid,
modelset = modelSet)
GraphChiModelConstructor.main(argMapToArray(args))
val u1ItemRec = modeldataItemRecScores.getByUid("u1")
val u2ItemRec = modeldataItemRecScores.getByUid("u2")
val u3ItemRec = modeldataItemRecScores.getByUid("u3")
// don't check id
u1ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u1Expected)) and
(u2ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u2Expected))) and
(u3ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u3Expected)))
}
"correctly writes ItemRecScores with smaller numRecommendations" in {
val algoid = 26
val modelSet = true
implicit val algo = Algo(
id = algoid,
engineid = 1234,
name = "",
infoid = "abc",
command = "",
params = Map(),
settings = Map(),
modelset = modelSet,
createtime = DateTime.now,
updatetime = DateTime.now,
status = "deployed",
offlineevalid = None,
offlinetuneid = None,
loop = None,
paramset = None
)
val args = Map(
"inputDir" -> inputDir,
"appid" -> appid,
"algoid" -> algoid,
"modelSet" -> modelSet,
"unseenOnly" -> false,
"numRecommendations" -> 2
)
val u1Expected = ItemRecScore(
uid = "u1",
iids = Seq("i4", "i1"),
scores = Seq(10.88, 8.97),
itypes = Seq(Seq("t3"), Seq("t1", "t2")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u2Expected = ItemRecScore(
uid = "u2",
iids = Seq("i2", "i3"),
scores = Seq(8.76, 7.67),
itypes = Seq(Seq("t1"), Seq("t2", "t3")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u3Expected = ItemRecScore(
uid = "u3",
iids = Seq("i4", "i2"),
scores = Seq(6.89, 6.29),
itypes = Seq(Seq("t3"), Seq("t1")),
appid = appid,
algoid = algoid,
modelset = modelSet)
GraphChiModelConstructor.main(argMapToArray(args))
val u1ItemRec = modeldataItemRecScores.getByUid("u1")
val u2ItemRec = modeldataItemRecScores.getByUid("u2")
val u3ItemRec = modeldataItemRecScores.getByUid("u3")
// don't check id
u1ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u1Expected)) and
(u2ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u2Expected))) and
(u3ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u3Expected)))
}
"correctly writes ItemRecScores with subset itemsIndex.tsv" in {
val inputDir = "/tmp/pio_test/subset/"
val inputDirFile = new File(inputDir)
inputDirFile.mkdirs()
val itemsIndex = List(
"1\ti1\tt1,t2",
"3\ti3\tt2,t3",
"4\ti4\tt3"
)
writeToFile(usersIndex, s"${inputDir}usersIndex.tsv")
writeToFile(itemsIndex, s"${inputDir}itemsIndex.tsv")
writeToFile(seenCSV, s"${inputDir}seen.csv")
writeToFile(ratingsUMM, s"${inputDir}ratings.mm_U.mm")
writeToFile(ratingsVMM, s"${inputDir}ratings.mm_V.mm")
val algoid = 27
val modelSet = false
implicit val algo = Algo(
id = algoid,
engineid = 1234,
name = "",
infoid = "abc",
command = "",
params = Map(),
settings = Map(),
modelset = modelSet,
createtime = DateTime.now,
updatetime = DateTime.now,
status = "deployed",
offlineevalid = None,
offlinetuneid = None,
loop = None,
paramset = None
)
val args = Map(
"inputDir" -> inputDir,
"appid" -> appid,
"algoid" -> algoid,
"modelSet" -> modelSet,
"unseenOnly" -> false,
"numRecommendations" -> 5
)
val u1Expected = ItemRecScore(
uid = "u1",
iids = Seq("i4", "i1", "i3"),
scores = Seq(10.88, 8.97, 8.71),
itypes = Seq(Seq("t3"), Seq("t1", "t2"), Seq("t2", "t3")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u2Expected = ItemRecScore(
uid = "u2",
iids = Seq("i3", "i4", "i1"),
scores = Seq(7.67, 6.76, 6.69),
itypes = Seq(Seq("t2", "t3"), Seq("t3"), Seq("t1", "t2")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u3Expected = ItemRecScore(
uid = "u3",
iids = Seq("i4", "i3", "i1"),
scores = Seq(6.89, 5.98, 5.91),
itypes = Seq(Seq("t3"), Seq("t2", "t3"), Seq("t1", "t2")),
appid = appid,
algoid = algoid,
modelset = modelSet)
GraphChiModelConstructor.main(argMapToArray(args))
val u1ItemRec = modeldataItemRecScores.getByUid("u1")
val u2ItemRec = modeldataItemRecScores.getByUid("u2")
val u3ItemRec = modeldataItemRecScores.getByUid("u3")
// don't check id
u1ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u1Expected)) and
(u2ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u2Expected))) and
(u3ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u3Expected)))
}
"correctly writes ItemRecScores with unseenOnly=true" in {
val algoid = 28
val modelSet = true
implicit val algo = Algo(
id = algoid,
engineid = 1234,
name = "",
infoid = "abc",
command = "",
params = Map(),
settings = Map(),
modelset = modelSet,
createtime = DateTime.now,
updatetime = DateTime.now,
status = "deployed",
offlineevalid = None,
offlinetuneid = None,
loop = None,
paramset = None
)
val args = Map(
"inputDir" -> inputDir,
"appid" -> appid,
"algoid" -> algoid,
"modelSet" -> modelSet,
"unseenOnly" -> true,
"numRecommendations" -> 4
)
val u1Expected = ItemRecScore(
uid = "u1",
iids = Seq("i4"),
scores = Seq(10.88),
itypes = Seq(Seq("t3")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u2Expected = ItemRecScore(
uid = "u2",
iids = Seq("i2", "i3"),
scores = Seq(8.76, 7.67),
itypes = Seq(Seq("t1"), Seq("t2", "t3")),
appid = appid,
algoid = algoid,
modelset = modelSet)
val u3Expected = ItemRecScore(
uid = "u3",
iids = Seq("i4", "i3", "i1"),
scores = Seq(6.89, 5.98, 5.91),
itypes = Seq(Seq("t3"), Seq("t2", "t3"), Seq("t1", "t2")),
appid = appid,
algoid = algoid,
modelset = modelSet)
GraphChiModelConstructor.main(argMapToArray(args))
val u1ItemRec = modeldataItemRecScores.getByUid("u1")
val u2ItemRec = modeldataItemRecScores.getByUid("u2")
val u3ItemRec = modeldataItemRecScores.getByUid("u3")
// don't check id
u1ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u1Expected)) and
(u2ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u2Expected))) and
(u3ItemRec.map(roundUpScores(_).copy(id = None)) must beSome(roundUpScores(u3Expected)))
}
}
// TODO: test evalid != None
// clean up when finish test
step(cleanUp())
}