blob: 6aedd75037f5404c02ebb16874d9bedd1fd0bd16 [file] [log] [blame]
package io.prediction.algorithms.generic.itemrec
import io.prediction.commons.Config
import io.prediction.commons.appdata.{ User, Item, U2IAction }
import org.apache.commons.io.FileUtils
import java.io.File
import org.specs2.mutable._
import com.github.nscala_time.time.Imports._
import scala.io.Source
import com.mongodb.casbah.Imports._
class GenericDataPreparatorSpec extends Specification {
// note: should match the db name defined in the application.conf
val parentDir = "/tmp/pio_test/io.prediction.algorithms.generic.itemrec.GenericDataPreparatorSpec"
val mongoDbName = "predictionio_appdata_generic_dataprep_test"
def cleanUp() = {
// remove the test database
MongoConnection()(mongoDbName).dropDatabase()
FileUtils.deleteDirectory(new File(parentDir))
}
val commonConfig = new Config
val appdataUsers = commonConfig.getAppdataUsers
val appdataItems = commonConfig.getAppdataItems
val appdataU2IActions = commonConfig.getAppdataU2IActions
val appid = 23
// insert a few users into db
val user = User(
id = "u1",
appid = appid,
ct = DateTime.now,
latlng = None,
inactive = None,
attributes = None)
appdataUsers.insert(user.copy(id = "u1"))
appdataUsers.insert(user.copy(id = "u2"))
appdataUsers.insert(user.copy(id = "u3"))
// insert a few items into db
val itemStartTime = DateTime.now
val itemStartTimeMillis = itemStartTime.millis
val item = Item(
id = "i1",
appid = appid,
ct = DateTime.now,
itypes = List("t1", "t2"),
starttime = Some(itemStartTime),
endtime = None,
price = None,
profit = None,
latlng = None,
inactive = None,
attributes = None)
appdataItems.insert(item.copy(id = "i1", itypes = List("t1", "t2"),
inactive = Some(false)))
appdataItems.insert(item.copy(id = "i2", itypes = List("t1")))
appdataItems.insert(item.copy(id = "i3", itypes = List("t2", "t3"),
inactive = Some(true)))
appdataItems.insert(item.copy(id = "i4", itypes = List("t3")))
appdataItems.insert(item.copy(id = "i5", itypes = List("t1"),
inactive = Some(true)))
appdataItems.insert(item.copy(id = "i6", itypes = List("t3"),
inactive = Some(false)))
// insert a few u2i into db
val u2i = U2IAction(
appid = appid,
action = "rate",
uid = "u0",
iid = "i0",
t = DateTime.now,
latlng = None,
v = Some(3),
price = None)
// test mixed and conflict actions
appdataU2IActions.insert(u2i.copy(uid = "u1", iid = "i1", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "u1", iid = "i1", action = "rate", v = Some(3)))
appdataU2IActions.insert(u2i.copy(uid = "u1", iid = "i2", action = "rate", v = Some(4)))
appdataU2IActions.insert(u2i.copy(uid = "u1", iid = "i3", action = "rate", v = Some(1)))
appdataU2IActions.insert(u2i.copy(uid = "u2", iid = "i1", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "u2", iid = "i1", action = "rate", v = Some(2)))
appdataU2IActions.insert(u2i.copy(uid = "u2", iid = "i2", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "u2", iid = "i2", action = "rate", v = Some(5)))
appdataU2IActions.insert(u2i.copy(uid = "u2", iid = "i2", action = "rate", v = Some(1)))
appdataU2IActions.insert(u2i.copy(uid = "u2", iid = "i4", action = "dislike"))
appdataU2IActions.insert(u2i.copy(uid = "u2", iid = "i4", action = "rate", v = Some(3)))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "i2", action = "like"))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "i2", action = "rate", v = Some(5)))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "i3", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "i3", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "i3", action = "rate", v = Some(1)))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "i4", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "i4", action = "rate", v = Some(4)))
// unknown user and item actions (not exist in user and items appdata)
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "unkowni5", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "u3", iid = "unkowni6", action = "rate", v = Some(4)))
appdataU2IActions.insert(u2i.copy(uid = "unkownu4", iid = "i2", action = "view"))
appdataU2IActions.insert(u2i.copy(uid = "unkownu4", iid = "i1", action = "rate", v = Some(3)))
"GenericDataPreparator with matrixMarket = true" should {
val outputDir = s"${parentDir}/mmtrue"
val args = Map(
"outputDir" -> outputDir,
"appid" -> appid,
"viewParam" -> 4,
"likeParam" -> 3,
"dislikeParam" -> 1,
"conversionParam" -> 2,
"conflictParam" -> "latest",
"matrixMarket" -> true
)
val argsArray = args.toArray.flatMap {
case (k, v) =>
Array(s"--${k}", v.toString)
}
GenericDataPreparator.main(argsArray)
"correctly generate usersIndex.tsv" in {
val usersIndex = Source.fromFile(s"${outputDir}usersIndex.tsv")
.getLines()
.toList
val expected = List(
"1\tu1",
"2\tu2",
"3\tu3")
usersIndex must containTheSameElementsAs(expected)
}
"correctly generate itemsIndex.tsv" in {
val itemsIndex = Source.fromFile(s"${outputDir}itemsIndex.tsv")
.getLines()
.toList
val expected = List(
s"1\ti1\tt1,t2\t${itemStartTimeMillis}",
s"2\ti2\tt1\t${itemStartTimeMillis}",
//s"3\ti3\tt2,t3\t${itemStartTimeMillis}", inactive
s"4\ti4\tt3\t${itemStartTimeMillis}",
s"6\ti6\tt3\t${itemStartTimeMillis}"
)
itemsIndex must containTheSameElementsAs(expected)
}
"correctly generate ratings.mm" in {
val ratingsLines = Source.fromFile(s"${outputDir}ratings.mm")
.getLines().toList
val headers = ratingsLines.take(2)
val ratings = ratingsLines.drop(2)
val expectedHeaders = List(
"%%MatrixMarket matrix coordinate real general",
"3 6 9"
)
val expected = List(
"1 1 3",
"1 2 4",
"1 3 1",
"2 1 2",
"2 2 1",
"2 4 3",
"3 2 5",
"3 3 1",
"3 4 4"
)
headers must be_==(expectedHeaders) and
(ratings must containTheSameElementsAs(expected))
}
"correctly write seen.csv" in {
val seen = Source.fromFile(s"${outputDir}seen.csv").getLines().toList
val expected = List(
"1,1",
"1,2",
"1,3",
"2,1",
"2,2",
"2,4",
"3,2",
"3,3",
"3,4"
)
seen must containTheSameElementsAs(expected)
}
}
"GenericDataPreparator with matrixMarket = false" should {
val outputDir = s"${parentDir}/mmfalse"
val args = Map(
"outputDir" -> outputDir,
"appid" -> appid,
"viewParam" -> 4,
"likeParam" -> 3,
"dislikeParam" -> 1,
"conversionParam" -> 2,
"conflictParam" -> "latest",
"matrixMarket" -> false
)
val argsArray = args.toArray.flatMap {
case (k, v) =>
Array(s"--${k}", v.toString)
}
GenericDataPreparator.main(argsArray)
"correctly generate usersIndex.tsv" in {
val usersIndex = Source.fromFile(s"${outputDir}usersIndex.tsv")
.getLines()
.toList
val expected = List(
"1\tu1",
"2\tu2",
"3\tu3")
usersIndex must containTheSameElementsAs(expected)
}
"correctly generate itemsIndex.tsv" in {
val itemsIndex = Source.fromFile(s"${outputDir}itemsIndex.tsv")
.getLines()
.toList
val expected = List(
s"1\ti1\tt1,t2\t${itemStartTimeMillis}",
s"2\ti2\tt1\t${itemStartTimeMillis}",
//s"3\ti3\tt2,t3\t${itemStartTimeMillis}", inactive
s"4\ti4\tt3\t${itemStartTimeMillis}",
s"6\ti6\tt3\t${itemStartTimeMillis}"
)
itemsIndex must containTheSameElementsAs(expected)
}
"correctly generate ratings.csv" in {
val ratings = Source.fromFile(s"${outputDir}ratings.csv")
.getLines().toList
val expected = List(
"1,1,3",
"1,2,4",
"1,3,1",
"2,1,2",
"2,2,1",
"2,4,3",
"3,2,5",
"3,3,1",
"3,4,4"
)
ratings must containTheSameElementsAs(expected)
}
"correctly write seen.csv" in {
val seen = Source.fromFile(s"${outputDir}seen.csv").getLines().toList
val expected = List(
"1,1",
"1,2",
"1,3",
"2,1",
"2,2",
"2,4",
"3,2",
"3,3",
"3,4"
)
seen must containTheSameElementsAs(expected)
}
}
"GenericDataPreparator with matrixMarket = false and seenAction" should {
val outputDir = s"${parentDir}/seenActions"
val args = Map(
"outputDir" -> outputDir,
"appid" -> appid,
"viewParam" -> 4,
"likeParam" -> 3,
"dislikeParam" -> 1,
"conversionParam" -> 2,
"conflictParam" -> "latest",
"seenActions" -> Array("view", "like"),
"matrixMarket" -> false
)
val argsArray = args.toArray.flatMap {
case (k, v) =>
v match {
case x: Array[String] => Array(s"--${k}") ++ x
case _ => Array(s"--${k}", v.toString)
}
}
GenericDataPreparator.main(argsArray)
"correctly generate usersIndex.tsv" in {
val usersIndex = Source.fromFile(s"${outputDir}usersIndex.tsv")
.getLines()
.toList
val expected = List(
"1\tu1",
"2\tu2",
"3\tu3")
usersIndex must containTheSameElementsAs(expected)
}
"correctly generate itemsIndex.tsv" in {
val itemsIndex = Source.fromFile(s"${outputDir}itemsIndex.tsv")
.getLines()
.toList
val expected = List(
s"1\ti1\tt1,t2\t${itemStartTimeMillis}",
s"2\ti2\tt1\t${itemStartTimeMillis}",
//s"3\ti3\tt2,t3\t${itemStartTimeMillis}", inactive
s"4\ti4\tt3\t${itemStartTimeMillis}",
s"6\ti6\tt3\t${itemStartTimeMillis}"
)
itemsIndex must containTheSameElementsAs(expected)
}
"correctly generate ratings.csv" in {
val ratings = Source.fromFile(s"${outputDir}ratings.csv")
.getLines().toList
val expected = List(
"1,1,3",
"1,2,4",
"1,3,1",
"2,1,2",
"2,2,1",
"2,4,3",
"3,2,5",
"3,3,1",
"3,4,4"
)
ratings must containTheSameElementsAs(expected)
}
"correctly write seen.csv" in {
val seen = Source.fromFile(s"${outputDir}seen.csv").getLines().toList
val expected = List(
"1,1",
"2,1",
"2,2",
"3,2",
"3,3",
"3,4"
)
seen must containTheSameElementsAs(expected)
}
}
// TODO: test start and end time
// TODO: test evalid != None
// clean up when finish test
step(cleanUp())
}