Merge branch 'release-0.6.8'
Conflicts:
bin/common.sh
build.sbt
dist/bin/backup
dist/bin/common.sh
dist/bin/restore
dist/bin/updatecheck
dist/bin/upgrade
dist/conf/init.json
servers/admin/build.sbt
servers/api/build.sbt
servers/scheduler/build.sbt
servers/scheduler/conf/application.conf
tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Backup.scala
tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Restore.scala
tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/UpdateCheck.scala
tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Upgrade.scala
diff --git a/LICENSE b/LICENSE
index 658e287..0c8aba7 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
PredictionIO
A prediction server for software developers
-Copyright (C) 2013 TappingStone, Inc.
+Copyright (C) 2012-2014 TappingStone, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
diff --git a/bin/common.sh b/bin/common.sh
index bc123c0..695770b 100644
--- a/bin/common.sh
+++ b/bin/common.sh
@@ -2,7 +2,7 @@
# This script should be sourced with $BASE set to the base of the repository
-VERSION=0.6.7
+VERSION=0.6.8
# Play framework related
PLAY_OPTS=
diff --git a/build.sbt b/build.sbt
index 6bd2d94..5c9de39 100644
--- a/build.sbt
+++ b/build.sbt
@@ -4,7 +4,7 @@
name := "predictionio"
-version in ThisBuild := "0.6.7"
+version in ThisBuild := "0.6.8"
organization in ThisBuild := "io.prediction"
diff --git a/commons/src/main/scala/io/prediction/commons/settings/Engines.scala b/commons/src/main/scala/io/prediction/commons/settings/Engines.scala
index 57b9ddd..72a4c7c 100644
--- a/commons/src/main/scala/io/prediction/commons/settings/Engines.scala
+++ b/commons/src/main/scala/io/prediction/commons/settings/Engines.scala
@@ -14,6 +14,8 @@
* @param infoid EngineInfo ID.
* @param itypes List of item types.
* @param params Engine parameters as key-value pairs.
+ * @param trainingdisabled Whether training is disabled or not. If value is undefined, assume training is not disabled.
+ * @param trainingschedule Training schedule of this engine in cron expression. Default to an hourly schedule at 0 minute.
*/
case class Engine(
id: Int,
@@ -21,7 +23,9 @@
name: String,
infoid: String,
itypes: Option[Seq[String]],
- params: Map[String, Any])
+ params: Map[String, Any],
+ trainingdisabled: Option[Boolean] = None,
+ trainingschedule: Option[String] = None)
/** Base trait for implementations that interact with engines in the backend data store. */
trait Engines extends Common {
@@ -69,7 +73,7 @@
}
}
-/** json4s serializer for the Algo class. */
+/** json4s serializer for the Engine class. */
class EngineSerializer extends CustomSerializer[Engine](format => (
{
case x: JObject =>
@@ -80,7 +84,9 @@
name = (x \ "name").extract[String],
infoid = (x \ "infoid").extract[String],
itypes = (x \ "itypes").extract[Option[Seq[String]]],
- params = Common.sanitize((x \ "params").asInstanceOf[JObject].values))
+ params = Common.sanitize((x \ "params").asInstanceOf[JObject].values),
+ trainingdisabled = (x \ "trainingdisabled").extract[Option[Boolean]],
+ trainingschedule = (x \ "trainingschedule").extract[Option[String]])
},
{
case x: Engine =>
@@ -91,6 +97,8 @@
JField("name", Extraction.decompose(x.name)) ::
JField("infoid", Extraction.decompose(x.infoid)) ::
JField("itypes", Extraction.decompose(x.itypes)) ::
- JField("params", Extraction.decompose(x.params)) :: Nil)
+ JField("params", Extraction.decompose(x.params)) ::
+ JField("trainingdisabled", Extraction.decompose(x.trainingdisabled)) ::
+ JField("trainingschedule", Extraction.decompose(x.trainingschedule)) :: Nil)
})
)
diff --git a/commons/src/main/scala/io/prediction/commons/settings/mongodb/MongoEngines.scala b/commons/src/main/scala/io/prediction/commons/settings/mongodb/MongoEngines.scala
index cb77980..436b0e4 100644
--- a/commons/src/main/scala/io/prediction/commons/settings/mongodb/MongoEngines.scala
+++ b/commons/src/main/scala/io/prediction/commons/settings/mongodb/MongoEngines.scala
@@ -9,7 +9,6 @@
class MongoEngines(db: MongoDB) extends Engines {
private val engineColl = db("engines")
private val seq = new MongoSequences(db)
- private val getFields = MongoDBObject("appid" -> 1, "name" -> 1, "infoid" -> 1, "itypes" -> 1, "params" -> 1)
private def dbObjToEngine(dbObj: DBObject) = {
/** Transparent upgrade. Remove in next minor version. */
@@ -20,7 +19,9 @@
name = dbObj.as[String]("name"),
infoid = dbObj.as[String]("infoid"),
itypes = dbObj.getAs[MongoDBList]("itypes") map { MongoUtils.mongoDbListToListOfString(_) },
- params = MongoUtils.dbObjToMap(settings))
+ params = MongoUtils.dbObjToMap(settings),
+ trainingdisabled = dbObj.getAs[Boolean]("trainingdisabled"),
+ trainingschedule = dbObj.getAs[String]("trainingschedule"))
update(e)
e
} getOrElse {
@@ -30,7 +31,9 @@
name = dbObj.as[String]("name"),
infoid = dbObj.as[String]("infoid"),
itypes = dbObj.getAs[MongoDBList]("itypes") map { MongoUtils.mongoDbListToListOfString(_) },
- params = MongoUtils.dbObjToMap(dbObj.as[DBObject]("params")))
+ params = MongoUtils.dbObjToMap(dbObj.as[DBObject]("params")),
+ trainingdisabled = dbObj.getAs[Boolean]("trainingdisabled"),
+ trainingschedule = dbObj.getAs[String]("trainingschedule"))
}
}
@@ -52,14 +55,16 @@
)
// optional fields
- val optObj = engine.itypes.map(x => MongoDBObject("itypes" -> x)).getOrElse(MongoUtils.emptyObj)
+ val optObj = engine.itypes.map(x => MongoDBObject("itypes" -> x)).getOrElse(MongoUtils.emptyObj) ++
+ engine.trainingdisabled.map(x => MongoDBObject("trainingdisabled" -> x)).getOrElse(MongoUtils.emptyObj) ++
+ engine.trainingschedule.map(x => MongoDBObject("trainingschedule" -> x)).getOrElse(MongoUtils.emptyObj)
engineColl.insert(obj ++ optObj)
id
}
- def get(id: Int) = engineColl.findOne(MongoDBObject("_id" -> id), getFields) map { dbObjToEngine(_) }
+ def get(id: Int) = engineColl.findOne(MongoDBObject("_id" -> id)) map { dbObjToEngine(_) }
def getAll() = new MongoEngineIterator(engineColl.find())
@@ -76,10 +81,12 @@
val infoidObj = MongoDBObject("infoid" -> engine.infoid)
val itypesObj = engine.itypes.map(x => MongoDBObject("itypes" -> x)).getOrElse(MongoUtils.emptyObj)
val paramsObj = MongoDBObject("params" -> engine.params)
+ val trainingdisabledObj = engine.trainingdisabled.map(x => MongoDBObject("trainingdisabled" -> x)).getOrElse(MongoUtils.emptyObj)
+ val trainingscheduleObj = engine.trainingschedule.map(x => MongoDBObject("trainingschedule" -> x)).getOrElse(MongoUtils.emptyObj)
engineColl.update(
idObj,
- idObj ++ appidObj ++ nameObj ++ infoidObj ++ itypesObj ++ paramsObj,
+ idObj ++ appidObj ++ nameObj ++ infoidObj ++ itypesObj ++ paramsObj ++ trainingdisabledObj ++ trainingscheduleObj,
upsert
)
}
diff --git a/dist/bin/backup b/dist/bin/backup
index b8b418a..a2e346c 100755
--- a/dist/bin/backup
+++ b/dist/bin/backup
@@ -302,7 +302,7 @@
declare -r lib_dir="$(realpath "${app_home}/../lib")"
declare -r app_mainclass="io.prediction.tools.softwaremanager.Backup"
-declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.7.jar:$lib_dir/io.prediction.softwaremanager-0.6.7.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
+declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.8.jar:$lib_dir/io.prediction.softwaremanager-0.6.8.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
addJava "-Dconfig.file=${app_home}/../conf/predictionio.conf -Dio.prediction.base=${app_home}/.."
declare -r java_cmd=$(get_java_cmd)
diff --git a/dist/bin/common.sh b/dist/bin/common.sh
index 151d8df..d22c3de 100644
--- a/dist/bin/common.sh
+++ b/dist/bin/common.sh
@@ -2,7 +2,7 @@
# This script should be sourced with $BASE set to the base of the repository
-VERSION=0.6.7
+VERSION=0.6.8
# Play framework related
PLAY_OPTS=""
diff --git a/dist/bin/restore b/dist/bin/restore
index 24c6afe..aaed7c2 100755
--- a/dist/bin/restore
+++ b/dist/bin/restore
@@ -302,7 +302,7 @@
declare -r lib_dir="$(realpath "${app_home}/../lib")"
declare -r app_mainclass="io.prediction.tools.softwaremanager.Restore"
-declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.7.jar:$lib_dir/io.prediction.softwaremanager-0.6.7.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
+declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.8.jar:$lib_dir/io.prediction.softwaremanager-0.6.8.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
addJava "-Dconfig.file=${app_home}/../conf/predictionio.conf -Dio.prediction.base=${app_home}/.."
declare -r java_cmd=$(get_java_cmd)
diff --git a/dist/bin/updatecheck b/dist/bin/updatecheck
index 8f3db7c..5ac4e5d 100755
--- a/dist/bin/updatecheck
+++ b/dist/bin/updatecheck
@@ -302,7 +302,7 @@
declare -r lib_dir="$(realpath "${app_home}/../lib")"
declare -r app_mainclass="io.prediction.tools.softwaremanager.UpdateCheck"
-declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.7.jar:$lib_dir/io.prediction.softwaremanager-0.6.7.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
+declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.8.jar:$lib_dir/io.prediction.softwaremanager-0.6.8.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
addJava "-Dconfig.file=${app_home}/../conf/predictionio.conf -Dio.prediction.base=${app_home}/.."
declare -r java_cmd=$(get_java_cmd)
diff --git a/dist/bin/upgrade b/dist/bin/upgrade
index 6e38675..2ca33ca 100755
--- a/dist/bin/upgrade
+++ b/dist/bin/upgrade
@@ -302,7 +302,7 @@
declare -r lib_dir="$(realpath "${app_home}/../lib")"
declare -r app_mainclass="io.prediction.tools.softwaremanager.Upgrade"
-declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.7.jar:$lib_dir/io.prediction.softwaremanager-0.6.7.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
+declare -r app_classpath="$lib_dir/com.github.nscala-time.nscala-time_2.10-0.6.0.jar:$lib_dir/com.github.scopt.scopt_2.10-3.1.0.jar:$lib_dir/com.thoughtworks.paranamer.paranamer-2.6.jar:$lib_dir/com.typesafe.config-1.0.2.jar:$lib_dir/commons-io.commons-io-2.4.jar:$lib_dir/io.prediction.predictionio-commons-0.6.8.jar:$lib_dir/io.prediction.softwaremanager-0.6.8.jar:$lib_dir/joda-time.joda-time-2.3.jar:$lib_dir/org.joda.joda-convert-1.5.jar:$lib_dir/org.json4s.json4s-ast_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-core_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-ext_2.10-3.2.6.jar:$lib_dir/org.json4s.json4s-native_2.10-3.2.6.jar:$lib_dir/org.mongodb.casbah-commons_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-core_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-gridfs_2.10-2.6.2.jar:$lib_dir/org.mongodb.casbah-query_2.10-2.6.2.jar:$lib_dir/org.mongodb.mongo-java-driver-2.11.2.jar:$lib_dir/org.scala-lang.scala-compiler-2.10.0.jar:$lib_dir/org.scala-lang.scala-library-2.10.2.jar:$lib_dir/org.scala-lang.scala-reflect-2.10.0.jar:$lib_dir/org.scala-lang.scalap-2.10.0.jar:$lib_dir/org.slf4j.slf4j-api-1.6.0.jar:$lib_dir/org.slf4j.slf4j-nop-1.6.0.jar"
addJava "-Dconfig.file=${app_home}/../conf/predictionio.conf -Dio.prediction.base=${app_home}/.."
declare -r java_cmd=$(get_java_cmd)
diff --git a/dist/conf/init.json b/dist/conf/init.json
index a4d068f..74041f0 100644
--- a/dist/conf/init.json
+++ b/dist/conf/init.json
@@ -1,38 +1,38 @@
{
"systeminfos": {
"version": {
- "value": "0.6.7",
+ "value": "0.6.8",
"description": "PredictionIO version"
},
"jars.pdioItemrecAlgo": {
- "value": "predictionio-process-hadoop-scalding-assembly-0.6.7.jar"
+ "value": "predictionio-process-hadoop-scalding-assembly-0.6.8.jar"
},
"jars.pdioItemsimAlgo": {
- "value": "predictionio-process-hadoop-scalding-assembly-0.6.7.jar"
+ "value": "predictionio-process-hadoop-scalding-assembly-0.6.8.jar"
},
"jars.mahoutItemrecAlgo": {
- "value": "predictionio-process-itemrec-algorithms-scala-mahout-assembly-0.6.7.jar"
+ "value": "predictionio-process-itemrec-algorithms-scala-mahout-assembly-0.6.8.jar"
},
"jars.pdioItemrecEval": {
- "value": "predictionio-process-hadoop-scalding-assembly-0.6.7.jar"
+ "value": "predictionio-process-hadoop-scalding-assembly-0.6.8.jar"
},
"jars.pdioItemsimEval": {
- "value": "predictionio-process-hadoop-scalding-assembly-0.6.7.jar"
+ "value": "predictionio-process-hadoop-scalding-assembly-0.6.8.jar"
},
"jars.pdioItemrecTopK": {
- "value": "predictionio-process-itemrec-evaluations-topkitems-assembly-0.6.7.jar"
+ "value": "predictionio-process-itemrec-evaluations-topkitems-assembly-0.6.8.jar"
},
"jars.pdioItemsimTopK": {
- "value": "predictionio-process-itemsim-evaluations-topkitems-assembly-0.6.7.jar"
+ "value": "predictionio-process-itemsim-evaluations-topkitems-assembly-0.6.8.jar"
},
"jars.pdioCommonsEval": {
- "value": "predictionio-process-hadoop-scalding-assembly-0.6.7.jar"
+ "value": "predictionio-process-hadoop-scalding-assembly-0.6.8.jar"
},
"jars.pdioCommonsParamGen": {
- "value": "predictionio-process-commons-evaluations-paramgen-assembly-0.6.7.jar"
+ "value": "predictionio-process-commons-evaluations-paramgen-assembly-0.6.8.jar"
},
"jars.pdioCommonsU2ITrainingTestSplit": {
- "value": "predictionio-process-commons-evaluations-scala-u2itrainingtestsplittime-assembly-0.6.7.jar"
+ "value": "predictionio-process-commons-evaluations-scala-u2itrainingtestsplittime-assembly-0.6.8.jar"
}
},
"engineinfos": {
@@ -296,10 +296,10 @@
"name": "Random Rank",
"description": "Predict user preferences randomly.",
"batchcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.randomrank.RandomRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet $modelset$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.randomrank.RandomRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet $modelset$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.randomrank.RandomRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet false --evalid $evalid$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.randomrank.RandomRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet false --recommendationTime $recommendationTime$ --evalid $evalid$"
],
"paramorder": [],
"engineinfoid": "itemrec",
@@ -316,10 +316,10 @@
"name": "Latest Rank",
"description": "Recommend latest items to users.",
"batchcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.latestrank.LatestRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet $modelset$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.latestrank.LatestRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet $modelset$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.latestrank.LatestRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet false --evalid $evalid$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.latestrank.LatestRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --modelSet false --recommendationTime $recommendationTime$ --evalid $evalid$"
],
"paramorder": [],
"engineinfoid": "itemrec",
@@ -338,12 +338,12 @@
"batchcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.KNNItemBased --hdfs --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --measureParam $measureParam$ --priorCountParam $priorCountParam$ --priorCorrelParam $priorCorrelParam$ --minNumRatersParam $minNumRatersParam$ --maxNumRatersParam $maxNumRatersParam$ --minIntersectionParam $minIntersectionParam$ --minNumRatedSimParam $minNumRatedSimParam$ --numRecommendations $numRecommendations$ --unseenOnly $unseenOnly$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.KNNItemBased --hdfs --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --measureParam $measureParam$ --priorCountParam $priorCountParam$ --priorCorrelParam $priorCorrelParam$ --minNumRatersParam $minNumRatersParam$ --maxNumRatersParam $maxNumRatersParam$ --minIntersectionParam $minIntersectionParam$ --minNumRatedSimParam $minNumRatedSimParam$ --numRecommendations $numRecommendations$ --unseenOnly $unseenOnly$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet false"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.itemrec.knnitembased.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet false --recommendationTime $recommendationTime$"
],
"paramorder": [
"measureParam",
@@ -825,18 +825,24 @@
"batchcommands": [
"$base$/bin/quiet.sh $hadoop$ fs -rmr $mahoutTempDir$",
"$base$/bin/quiet.sh $hadoop$ fs -rmr $algoDir$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
- "$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.item.RecommenderJob --input $dataFilePrefix$ratings.csv --output $algoFilePrefix$predicted.tsv --tempDir $mahoutTempDir$ --numRecommendations $numRecommendations$ --booleanData $booleanData$ --maxPrefsPerUser $maxPrefsPerUser$ --minPrefsPerUser $minPrefsPerUser$ --maxSimilaritiesPerItem $maxSimilaritiesPerItem$ --maxPrefsPerUserInItemSimilarity $maxPrefsPerUserInItemSimilarity$ --similarityClassname $similarityClassname$ --threshold $threshold$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$base$/bin/quiet.sh rm -rf $localTempDir$",
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$ --recommendationTime $recommendationTime$",
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$ --recommendationTime $recommendationTime$",
+ "$hadoop$ fs -getmerge $dataFilePrefix$recommendItems.csv $localTempDir$recommendItemsM.csv",
+ "$hadoop$ fs -copyFromLocal $localTempDir$recommendItemsM.csv $dataFilePrefix$recommendItemsM.csv",
+ "$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.item.RecommenderJob --input $dataFilePrefix$ratings.csv --itemsFile $dataFilePrefix$recommendItemsM.csv --output $algoFilePrefix$predicted.tsv --tempDir $mahoutTempDir$ --numRecommendations $numRecommendations$ --booleanData $booleanData$ --maxPrefsPerUser $maxPrefsPerUser$ --minPrefsPerUser $minPrefsPerUser$ --maxSimilaritiesPerItem $maxSimilaritiesPerItem$ --maxPrefsPerUserInItemSimilarity $maxPrefsPerUserInItemSimilarity$ --similarityClassname $similarityClassname$ --threshold $threshold$",
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --booleanData $booleanData$"
],
"offlineevalcommands": [
"$base$/bin/quiet.sh $hadoop$ fs -rmr $mahoutTempDir$",
"$base$/bin/quiet.sh $hadoop$ fs -rmr $algoDir$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
- "$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.item.RecommenderJob --input $dataFilePrefix$ratings.csv --output $algoFilePrefix$predicted.tsv --tempDir $mahoutTempDir$ --numRecommendations $numRecommendations$ --booleanData $booleanData$ --maxPrefsPerUser $maxPrefsPerUser$ --minPrefsPerUser $minPrefsPerUser$ --maxSimilaritiesPerItem $maxSimilaritiesPerItem$ --maxPrefsPerUserInItemSimilarity $maxPrefsPerUserInItemSimilarity$ --similarityClassname $similarityClassname$ --threshold $threshold$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$base$/bin/quiet.sh rm -rf $localTempDir$",
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$ --recommendationTime $recommendationTime$",
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$ --recommendationTime $recommendationTime$",
+ "$hadoop$ fs -getmerge $dataFilePrefix$recommendItems.csv $localTempDir$recommendItemsM.csv",
+ "$hadoop$ fs -copyFromLocal $localTempDir$recommendItemsM.csv $dataFilePrefix$recommendItemsM.csv",
+ "$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.item.RecommenderJob --input $dataFilePrefix$ratings.csv --itemsFile $dataFilePrefix$recommendItemsM.csv --output $algoFilePrefix$predicted.tsv --tempDir $mahoutTempDir$ --numRecommendations $numRecommendations$ --booleanData $booleanData$ --maxPrefsPerUser $maxPrefsPerUser$ --minPrefsPerUser $minPrefsPerUser$ --maxSimilaritiesPerItem $maxSimilaritiesPerItem$ --maxPrefsPerUserInItemSimilarity $maxPrefsPerUserInItemSimilarity$ --similarityClassname $similarityClassname$ --threshold $threshold$",
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --booleanData $booleanData$"
],
"paramorder": [
"booleanData",
@@ -964,7 +970,7 @@
}
]
},
- "defaultvalue": "SIMILARITY_COOCCURRENCE"
+ "defaultvalue": "SIMILARITY_LOGLIKELIHOOD"
},
"threshold": {
"name": "Threshold",
@@ -1320,7 +1326,7 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob --input $dataFilePrefix$ratings.csv --output $algoFilePrefix$matrix --tempDir $mahoutTempDir$ --lambda $lambda$ --implicitFeedback $implicitFeedback$ --numFeatures $numFeatures$ --numIterations $numIterations$",
"$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.als.RecommenderJob --input $algoFilePrefix$matrix/userRatings --userFeatures $algoFilePrefix$matrix/U --itemFeatures $algoFilePrefix$matrix/M --output $algoFilePrefix$predicted.tsv --tempDir $mahoutTempDir$ --numRecommendations $numRecommendations$ --maxRating 5",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --implicitFeedback $implicitFeedback$"
],
"offlineevalcommands": [
"$base$/bin/quiet.sh $hadoop$ fs -rmr $mahoutTempDir$",
@@ -1329,7 +1335,7 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob --input $dataFilePrefix$ratings.csv --output $algoFilePrefix$matrix --tempDir $mahoutTempDir$ --lambda $lambda$ --implicitFeedback $implicitFeedback$ --numFeatures $numFeatures$ --numIterations $numIterations$",
"$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.als.RecommenderJob --input $algoFilePrefix$matrix/userRatings --userFeatures $algoFilePrefix$matrix/U --itemFeatures $algoFilePrefix$matrix/M --output $algoFilePrefix$predicted.tsv --tempDir $mahoutTempDir$ --numRecommendations $numRecommendations$ --maxRating 5",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --implicitFeedback $implicitFeedback$"
],
"paramorder": [
"lambda",
@@ -1724,13 +1730,13 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.knnuserbased.KNNUserBasedJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --booleanData $booleanData$ --numRecommendations $numRecommendations$ --nearestN $nearestN$ --userSimilarity $userSimilarity$ --weighted $weighted$ --minSimilarity $minSimilarity$ --samplingRate $samplingRate$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --booleanData $booleanData$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.knnuserbased.KNNUserBasedJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --booleanData $booleanData$ --numRecommendations $numRecommendations$ --nearestN $nearestN$ --userSimilarity $userSimilarity$ --weighted $weighted$ --minSimilarity $minSimilarity$ --samplingRate $samplingRate$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --booleanData $booleanData$"
],
"paramorder": [
"booleanData",
@@ -2168,13 +2174,13 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.thresholduserbased.ThresholdUserBasedJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --booleanData $booleanData$ --numRecommendations $numRecommendations$ --threshold $threshold$ --userSimilarity $userSimilarity$ --weighted $weighted$ --samplingRate $samplingRate$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --booleanData $booleanData$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.thresholduserbased.ThresholdUserBasedJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --booleanData $booleanData$ --numRecommendations $numRecommendations$ --threshold $threshold$ --userSimilarity $userSimilarity$ --weighted $weighted$ --samplingRate $samplingRate$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$ --booleanData $booleanData$"
],
"paramorder": [
"booleanData",
@@ -2579,13 +2585,13 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.slopeone.SlopeOneJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --weighting $weighting$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.slopeone.SlopeOneJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --numRecommendations $numRecommendations$ --weighting $weighting$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"paramorder": [
"weighting",
@@ -2850,13 +2856,13 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.alswr.ALSWRJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --numFeatures $numFeatures$ --lambda $lambda$ --numIterations $numIterations$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.alswr.ALSWRJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --numRecommendations $numRecommendations$ --numFeatures $numFeatures$ --lambda $lambda$ --numIterations $numIterations$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"paramorder": [
"numFeatures",
@@ -3193,13 +3199,13 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.svdsgd.SVDSGDJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --numFeatures $numFeatures$ --learningRate $learningRate$ --preventOverfitting $preventOverfitting$ --randomNoise $randomNoise$ --numIterations $numIterations$ --learningRateDecay $learningRateDecay$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.svdsgd.SVDSGDJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --numRecommendations $numRecommendations$ --numFeatures $numFeatures$ --learningRate $learningRate$ --preventOverfitting $preventOverfitting$ --randomNoise $randomNoise$ --numIterations $numIterations$ --learningRateDecay $learningRateDecay$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"paramorder": [
"numFeatures",
@@ -3635,13 +3641,13 @@
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.svdplusplus.SVDPlusPlusJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numRecommendations $numRecommendations$ --numFeatures $numFeatures$ --learningRate $learningRate$ --preventOverfitting $preventOverfitting$ --randomNoise $randomNoise$ --numIterations $numIterations$ --learningRateDecay $learningRateDecay$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"java -Dio.prediction.base=$base$ $configFile$ -jar $base$/lib/$mahoutItemrecAlgo$ io.prediction.algorithms.mahout.itemrec.svdplusplus.SVDPlusPlusJob --hdfsRoot $hdfsRoot$ --localTempRoot $localTempRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --numRecommendations $numRecommendations$ --numFeatures $numFeatures$ --learningRate $learningRate$ --preventOverfitting $preventOverfitting$ --randomNoise $randomNoise$ --numIterations $numIterations$ --learningRateDecay $learningRateDecay$",
- "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$"
+ "$hadoop$ jar $base$/lib/$pdioItemrecAlgo$ io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --unseenOnly $unseenOnly$ --numRecommendations $numRecommendations$ --recommendationTime $recommendationTime$"
],
"paramorder": [
"numFeatures",
@@ -4074,10 +4080,10 @@
"name": "Random Rank",
"description": "Predict item similarities randomly.",
"batchcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.randomrank.RandomRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet $modelset$"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.randomrank.RandomRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet $modelset$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.randomrank.RandomRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet false --evalid $evalid$"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.randomrank.RandomRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet false --evalid $evalid$ --recommendationTime $recommendationTime$"
],
"paramorder": [],
"engineinfoid": "itemsim",
@@ -4094,10 +4100,10 @@
"name": "Latest Rank",
"description": "Consider latest items as most similar.",
"batchcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.latestrank.LatestRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet $modelset$"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.latestrank.LatestRank --hdfs --training_dbType $appdataDbType$ --training_dbName $appdataDbName$ --training_dbHost $appdataDbHost$ --training_dbPort $appdataDbPort$ --modeldata_dbType $modeldataDbType$ --modeldata_dbName $modeldataDbName$ --modeldata_dbHost $modeldataDbHost$ --modeldata_dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet $modelset$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.latestrank.LatestRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet false --evalid $evalid$"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.latestrank.LatestRank --hdfs --training_dbType $appdataTrainingDbType$ --training_dbName $appdataTrainingDbName$ --training_dbHost $appdataTrainingDbHost$ --training_dbPort $appdataTrainingDbPort$ --modeldata_dbType $modeldataTrainingDbType$ --modeldata_dbName $modeldataTrainingDbName$ --modeldata_dbHost $modeldataTrainingDbHost$ --modeldata_dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --numSimilarItems $numSimilarItems$ --modelSet false --evalid $evalid$ --recommendationTime $recommendationTime$"
],
"paramorder": [],
"engineinfoid": "itemsim",
@@ -4116,12 +4122,12 @@
"batchcommands": [
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.ItemSimilarity --hdfs --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --measureParam $measureParam$ --priorCountParam $priorCountParam$ --priorCorrelParam $priorCorrelParam$ --minNumRatersParam $minNumRatersParam$ --maxNumRatersParam $maxNumRatersParam$ --minIntersectionParam $minIntersectionParam$ --numSimilarItems $numSimilarItems$",
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.ItemSimilarity --hdfs --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --measureParam $measureParam$ --priorCountParam $priorCountParam$ --priorCorrelParam $priorCorrelParam$ --minNumRatersParam $minNumRatersParam$ --maxNumRatersParam $maxNumRatersParam$ --minIntersectionParam $minIntersectionParam$ --numSimilarItems $numSimilarItems$",
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet false"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.itemsim.itemsimcf.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet false --recommendationTime $recommendationTime$"
],
"paramorder": [
"measureParam",
@@ -4573,7 +4579,7 @@
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.DataCopy --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.DataPreparator --hdfs --dbType $appdataDbType$ --dbName $appdataDbName$ --dbHost $appdataDbHost$ --dbPort $appdataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob --input $dataFilePrefix$ratings.csv --output $algoFilePrefix$similarities.tsv --tempDir $mahoutTempDir$ --maxSimilaritiesPerItem $numSimilarItems$ --booleanData $booleanData$ --maxPrefsPerUser $maxPrefsPerUser$ --minPrefsPerUser $minPrefsPerUser$ --similarityClassname $similarityClassname$ --threshold $threshold$",
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --numSimilarItems $numSimilarItems$"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.ModelConstructor --hdfs --dbType $modeldataDbType$ --dbName $modeldataDbName$ --dbHost $modeldataDbHost$ --dbPort $modeldataDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --modelSet $modelset$ --numSimilarItems $numSimilarItems$ --recommendationTime $recommendationTime$"
],
"offlineevalcommands": [
"$base$/bin/quiet.sh $hadoop$ fs -rmr $mahoutTempDir$",
@@ -4581,7 +4587,7 @@
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.DataCopy --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.DataPreparator --hdfs --dbType $appdataTrainingDbType$ --dbName $appdataTrainingDbName$ --dbHost $appdataTrainingDbHost$ --dbPort $appdataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ $itypes$ --viewParam $viewParam$ --likeParam $likeParam$ --dislikeParam $dislikeParam$ --conversionParam $conversionParam$ --conflictParam $conflictParam$",
"$hadoop$ jar $mahoutCoreJob$ org.apache.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob --input $dataFilePrefix$ratings.csv --output $algoFilePrefix$similarities.tsv --tempDir $mahoutTempDir$ --maxSimilaritiesPerItem $numSimilarItems$ --booleanData $booleanData$ --maxPrefsPerUser $maxPrefsPerUser$ --minPrefsPerUser $minPrefsPerUser$ --similarityClassname $similarityClassname$ --threshold $threshold$",
- "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --numSimilarItems $numSimilarItems$"
+ "$hadoop$ jar $base$/lib/$pdioItemsimAlgo$ io.prediction.algorithms.scalding.mahout.itemsim.ModelConstructor --hdfs --dbType $modeldataTrainingDbType$ --dbName $modeldataTrainingDbName$ --dbHost $modeldataTrainingDbHost$ --dbPort $modeldataTrainingDbPort$ --hdfsRoot $hdfsRoot$ --appid $appid$ --engineid $engineid$ --algoid $algoid$ --evalid $evalid$ --modelSet $modelset$ --numSimilarItems $numSimilarItems$ --recommendationTime $recommendationTime$"
],
"paramorder": [
"booleanData",
@@ -4685,7 +4691,7 @@
}
]
},
- "defaultvalue": "SIMILARITY_COOCCURRENCE"
+ "defaultvalue": "SIMILARITY_LOGLIKELIHOOD"
},
"threshold": {
"name": "Threshold",
diff --git a/dist/conf/versions.json b/dist/conf/versions.json
index 0fb519b..6976cf1 100644
--- a/dist/conf/versions.json
+++ b/dist/conf/versions.json
@@ -1,5 +1,5 @@
{
- "latest": "0.6.0",
+ "latest": "0.6.8",
"versions": {
"0.5.0": {
"binaries": "http://download.prediction.io/PredictionIO-0.5.0.zip",
diff --git a/output/src/main/scala/io/prediction/output/itemrec/ItemRecAlgoOutput.scala b/output/src/main/scala/io/prediction/output/itemrec/ItemRecAlgoOutput.scala
index 4e90b51..3c77edf 100644
--- a/output/src/main/scala/io/prediction/output/itemrec/ItemRecAlgoOutput.scala
+++ b/output/src/main/scala/io/prediction/output/itemrec/ItemRecAlgoOutput.scala
@@ -15,9 +15,10 @@
object ItemRecAlgoOutput {
val config = new Config
- val items = config.getAppdataItems
def output(uid: String, n: Int, itypes: Option[Seq[String]], latlng: Option[Tuple2[Double, Double]], within: Option[Double], unit: Option[String])(implicit app: App, engine: Engine, algo: Algo, offlineEval: Option[OfflineEval] = None): Seq[String] = {
+ val items = offlineEval map { _ => config.getAppdataTrainingItems } getOrElse { config.getAppdataItems }
+
/** Serendipity settings. */
val serendipity = engine.params.get("serendipity").map { _.asInstanceOf[Int] }
diff --git a/output/src/main/scala/io/prediction/output/itemsim/ItemSimAlgoOutput.scala b/output/src/main/scala/io/prediction/output/itemsim/ItemSimAlgoOutput.scala
index 8b2559f..b1ab663 100644
--- a/output/src/main/scala/io/prediction/output/itemsim/ItemSimAlgoOutput.scala
+++ b/output/src/main/scala/io/prediction/output/itemsim/ItemSimAlgoOutput.scala
@@ -14,9 +14,10 @@
object ItemSimAlgoOutput {
val config = new Config
- val items = config.getAppdataItems
def output(iid: String, n: Int, itypes: Option[Seq[String]], latlng: Option[Tuple2[Double, Double]], within: Option[Double], unit: Option[String])(implicit app: App, engine: Engine, algo: Algo, offlineEval: Option[OfflineEval] = None): Seq[String] = {
+ val items = offlineEval map { _ => config.getAppdataTrainingItems } getOrElse { config.getAppdataItems }
+
/** Serendipity settings. */
val serendipity = engine.params.get("serendipity").map { _.asInstanceOf[Int] }
diff --git a/process/commons/hadoop/scalding/src/main/java/com/clojurewekz/cascading/mongodb/MongoDBScheme.java b/process/commons/hadoop/scalding/src/main/java/com/clojurewekz/cascading/mongodb/MongoDBScheme.java
index 6217642..afde791 100644
--- a/process/commons/hadoop/scalding/src/main/java/com/clojurewekz/cascading/mongodb/MongoDBScheme.java
+++ b/process/commons/hadoop/scalding/src/main/java/com/clojurewekz/cascading/mongodb/MongoDBScheme.java
@@ -166,7 +166,7 @@
if (tupleEntry != null) {
result.add(tupleEntry);
} else if (columnFieldName != this.keyColumnName) {
- result.add("");
+ result.add(null);
}
}
@@ -217,7 +217,7 @@
dbObject.put(columnFieldName, tupleEntryValue);
}
}
- logger.info("Putting key for output: {} {}", key, dbObject);
+ //logger.info("Putting key for output: {} {}", key, dbObject);
outputCollector.collect(key, new BSONWritable(dbObject));
}
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/ItemsSource.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/ItemsSource.scala
index feb5620..0e89da0 100644
--- a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/ItemsSource.scala
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/ItemsSource.scala
@@ -24,10 +24,13 @@
def readData(iidField: Symbol, itypesField: Symbol)(implicit fd: FlowDef): Pipe
/**
- * starttimeField: Symbol of starttime(String)
+ * iidField: Symbol of iid(String)
+ * itypesField: Symbol of itypes(List[String])
+ * starttimeField: Symbol of starttime(Long)
+ * endtimeField: Symbol of endtime(Option[Long])
*/
- def readStarttime(iidField: Symbol, itypesField: Symbol, starttimeField: Symbol)(implicit fd: FlowDef): Pipe = {
- throw new RuntimeException("ItemsSource readStarttime is not implemented.")
+ def readStartEndtime(iidField: Symbol, itypesField: Symbol, starttimeField: Symbol, endtimeField: Symbol)(implicit fd: FlowDef): Pipe = {
+ throw new RuntimeException("ItemsSource readStartEndtime is not implemented.")
}
/**
@@ -63,6 +66,7 @@
("appid" -> 'appid),
("ct" -> 'ct),
("itypes" -> 'itypes),
- ("starttime" -> 'starttime))
+ ("starttime" -> 'starttime),
+ ("endtime" -> 'endtime)) // optional
}
\ No newline at end of file
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/U2iActionsSource.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/U2iActionsSource.scala
index 8747c0c..e5f7d39 100644
--- a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/U2iActionsSource.scala
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/U2iActionsSource.scala
@@ -22,7 +22,7 @@
* uidField: Symbol of uid(String)
* iidField: Symbol of iid(String)
* tField: Symbol of t(String)
- * vField: Symbol of v(String)
+ * vField: Symbol of v(Option[String])
*/
def readData(actionField: Symbol, uidField: Symbol, iidField: Symbol, tField: Symbol, vField: Symbol)(implicit fd: FlowDef): Pipe
@@ -32,7 +32,7 @@
* uidField: Symbol of uid(String)
* iidField: Symbol of iid(String)
* tField: Symbol of t(String)
- * vField: Symbol of v(String)
+ * vField: Symbol of v(Option[String])
* appid: App ID(Int)
* p: Pipe. the data pipe.
*/
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileItemsSource.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileItemsSource.scala
index 35169ee..1e3d9f3 100644
--- a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileItemsSource.scala
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileItemsSource.scala
@@ -14,10 +14,14 @@
/**
* File Format:
- * <id>\t<itypes>\t<appid>\t<starttime>\t<ct>
+ * <id>\t<itypes>\t<appid>\t<starttime>\t<ct>\t<endtime>
+ *
+ * endtime is optional
+ * use PIO_NONE if no value for optional field
*
* Example:
- * 1 t1,t2,t3 4 123456 123210
+ * 1 t1,t2,t3 4 123456 123210 654321
+ * 1 t1,t2,t3 4 123456 123210 PIO_NONE
*/
class FileItemsSource(path: String, appId: Int, itypes: Option[List[String]]) extends Tsv(
p = path + "items.tsv"
@@ -54,12 +58,26 @@
dataPipe
}
- override def readStarttime(iidField: Symbol, itypesField: Symbol, starttimeField: Symbol)(implicit fd: FlowDef): Pipe = {
+ override def readStartEndtime(iidField: Symbol, itypesField: Symbol, starttimeField: Symbol, endtimeField: Symbol)(implicit fd: FlowDef): Pipe = {
this.read
- .mapTo((0, 1, 3) -> (iidField, itypesField, starttimeField)) { fields: (String, String, String) =>
- val (iid, itypes, starttime) = fields
+ .mapTo((0, 1, 3, 5) -> (iidField, itypesField, starttimeField, endtimeField)) { fields: (String, String, Long, String) =>
+ val (iid, itypes, starttime, endtime) = fields
- (iid, itypes.split(",").toList, starttime)
+ val endtimeOpt: Option[Long] = endtime match {
+ case "PIO_NONE" => None
+ case x: String => {
+ try {
+ Some(x.toLong)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert ${x} to Long. Exception: " + e)
+ Some(0)
+ }
+ }
+ }
+ }
+
+ (iid, itypes.split(",").toList, starttime, endtimeOpt)
}.then(filterItypes('itypes, itypes) _)
}
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSource.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSource.scala
index c930460..f326e6e 100644
--- a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSource.scala
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSource.scala
@@ -13,8 +13,12 @@
* File Format:
* <action>\t<uid>\t<iid>\t<t>\t<v>
*
+ * v is optional
+ * use PIO_NONE if no value for optional field
+ *
* Example:
* rate u2 i13 123456 4
+ * view u2 i13 234567 PIO_NONE
*/
class FileU2iActionsSource(path: String, appId: Int) extends Tsv(
p = path + "u2iActions.tsv"
@@ -30,17 +34,24 @@
fields: (String, String, String, String, String) =>
val (action, uid, iid, t, v) = fields
- (action, uid, iid, t, v)
+ val vOpt: Option[String] = v match {
+ case "PIO_NONE" => None
+ case x: String => Some(x)
+ }
+
+ (action, uid, iid, t, vOpt)
}
}
override def writeData(actionField: Symbol, uidField: Symbol, iidField: Symbol, tField: Symbol, vField: Symbol, appid: Int)(p: Pipe)(implicit fd: FlowDef): Pipe = {
val writtenData = p.mapTo((actionField, uidField, iidField, tField, vField) ->
(FIELD_SYMBOLS("action"), FIELD_SYMBOLS("uid"), FIELD_SYMBOLS("iid"), FIELD_SYMBOLS("t"), FIELD_SYMBOLS("v"), FIELD_SYMBOLS("appid"))) {
- fields: (String, String, String, String, String) =>
+ fields: (String, String, String, String, Option[String]) =>
val (action, uid, iid, t, v) = fields
- (action, uid, iid, t, v.toInt, appid)
+ val vData: String = v.getOrElse("PIO_NONE")
+
+ (action, uid, iid, t, vData, appid)
}.write(this)
writtenData
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoItemsSource.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoItemsSource.scala
index 3f956e5..ec8b4de 100644
--- a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoItemsSource.scala
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoItemsSource.scala
@@ -33,6 +33,7 @@
itemsCols.add("appid") // 2
itemsCols.add("starttime") // 3
itemsCols.add("ct") // 4
+ itemsCols.add("endtime") // 5 optional
itemsCols
},
@@ -44,6 +45,7 @@
itemsMappings.put("appid", FIELD_SYMBOLS("appid").name)
itemsMappings.put("starttime", FIELD_SYMBOLS("starttime").name)
itemsMappings.put("ct", FIELD_SYMBOLS("ct").name)
+ //itemsMappings.put("endtime", FIELD_SYMBOLS("endtime").name) // optional
itemsMappings
},
@@ -70,14 +72,16 @@
items
}
- override def readStarttime(iidField: Symbol, itypesField: Symbol, starttimeField: Symbol)(implicit fd: FlowDef): Pipe = {
+ override def readStartEndtime(iidField: Symbol, itypesField: Symbol, starttimeField: Symbol, endtimeField: Symbol)(implicit fd: FlowDef): Pipe = {
val items = this.read
- .mapTo((0, 1, 3) -> (iidField, itypesField, starttimeField)) { fields: (String, BasicDBList, java.util.Date) =>
+ .mapTo((0, 1, 3, 5) -> (iidField, itypesField, starttimeField, endtimeField)) { fields: (String, BasicDBList, java.util.Date, java.util.Date) =>
//val dt = new DateTime(fields._3)
+ val starttime: Long = fields._3.getTime()
+ val endtimeOpt: Option[Long] = Option(fields._4).map(_.getTime()) // NOTE: become None if fields._4 is null
// NOTE: convert itypes form BasicDBList to scala List.
- (fields._1, fields._2.toList, fields._3.getTime().toString)
+ (fields._1, fields._2.toList, starttime, endtimeOpt)
}
items
@@ -94,7 +98,7 @@
ct = new DateTime(ct),
itypes = itypes.toList.map(x => x.toString),
starttime = Some(new DateTime(starttime)),
- endtime = None,
+ endtime = None, // TODO: endtime Option(endtime).map(x => new DateTime(x)),
price = None,
profit = None,
latlng = None,
@@ -136,6 +140,7 @@
val starttime: java.util.Date = obj.starttime.get.toDate()
val ct: java.util.Date = obj.ct.toDate()
+ // TODO: write endtime
(obj.id, itypesTuple, obj.appid, starttime, ct)
}.write(this)
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoU2iActionsSource.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoU2iActionsSource.scala
index db69a72..c54a60d 100644
--- a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoU2iActionsSource.scala
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/MongoU2iActionsSource.scala
@@ -89,8 +89,9 @@
val (action, uid, iid, t, v) = fields
//val dt = new DateTime(t)
+ val vOpt: Option[String] = Option(v)
- (action, uid, iid, t.getTime().toString, v)
+ (action, uid, iid, t.getTime().toString, vOpt)
}
u2iactions
@@ -99,10 +100,12 @@
override def writeData(actionField: Symbol, uidField: Symbol, iidField: Symbol, tField: Symbol, vField: Symbol, appid: Int)(p: Pipe)(implicit fd: FlowDef): Pipe = {
val dbData = p.mapTo((actionField, uidField, iidField, tField, vField) ->
(FIELD_SYMBOLS("action"), FIELD_SYMBOLS("uid"), FIELD_SYMBOLS("iid"), FIELD_SYMBOLS("t"), FIELD_SYMBOLS("v"), FIELD_SYMBOLS("appid"))) {
- fields: (String, String, String, String, String) =>
+ fields: (String, String, String, String, Option[String]) =>
val (action, uid, iid, t, v) = fields
- (action, uid, iid, new java.util.Date(t.toLong), v.toInt, appid)
+ val vData: String = v.getOrElse(null) // use null if no such field for this record
+
+ (action, uid, iid, new java.util.Date(t.toLong), vData, appid)
}.write(this)
dbData
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/examples/ReadWrite.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/examples/ReadWrite.scala
index e8abdc7..5c2f19c 100644
--- a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/examples/ReadWrite.scala
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/examples/ReadWrite.scala
@@ -53,7 +53,7 @@
val items = itemsSource.readData('iid, 'itypes)
.write(Tsv("items.tsv"))
- val itemsStarttime = itemsSource.readStarttime('iid, 'itypes, 'starttime)
+ val itemsStarttime = itemsSource.readStartEndtime('iid, 'itypes, 'starttime, 'endtime)
.write(Tsv("itemsStarttime.tsv"))
val itemsObj = itemsSource.readObj('item)
diff --git a/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/examples/TestSchemaless.scala b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/examples/TestSchemaless.scala
new file mode 100644
index 0000000..ba7c5c7
--- /dev/null
+++ b/process/commons/hadoop/scalding/src/main/scala/io/prediction/commons/scalding/appdata/monogodb/examples/TestSchemaless.scala
@@ -0,0 +1,46 @@
+package io.prediction.commons.scalding.appdata.mongodb.examples
+
+import com.twitter.scalding._
+
+import io.prediction.commons.scalding.appdata.mongodb.{ MongoUsersSource, MongoItemsSource, MongoU2iActionsSource }
+import io.prediction.commons.appdata.{ Item, User }
+
+class TestSchemaless(args: Args) extends Job(args) {
+
+ val read_dbNameArg = args("read_dbName")
+ val read_dbHostArg = args("read_dbHost")
+ val read_dbPortArg = args("read_dbPort").toInt
+
+ val read_appidArg = args("read_appid").toInt
+
+ val preItypesArg = args.list("itypes")
+ val itypesArg: Option[List[String]] = if (preItypesArg.mkString(",").length == 0) None else Option(preItypesArg)
+
+ val itemsSource = new MongoItemsSource(read_dbNameArg, read_dbHostArg, read_dbPortArg, read_appidArg, itypesArg)
+
+ val itemsStarttime = itemsSource.readStartEndtime('iid, 'itypes, 'starttime, 'endtime)
+ .mapTo(('iid, 'itypes, 'starttime, 'endtime) -> ('iid, 'itypes, 'starttime, 'endtime)) {
+ fields: (String, List[String], Long, Option[Long]) =>
+ (fields._1, fields._2.mkString(","), fields._3, fields._4.getOrElse("PIO_NONE"))
+ }
+ .write(Tsv("itemsStarttime.tsv"))
+
+}
+
+class TestSchemaless2(args: Args) extends Job(args) {
+
+ Tsv("itemsStarttime.tsv").read
+ .mapTo((0, 1, 2, 3) -> ('id, 'itypes, 'starttime, 'endtime)) {
+ fields: (String, String, Long, String) =>
+ val endtime: Option[Long] = fields._4 match {
+ case "PIO_NONE" => None
+ case x: String => Some(x.toLong)
+ }
+
+ (fields._1, fields._2, fields._3, endtime)
+ }
+ .write(Tsv("itemsStarttime2.tsv"))
+ .filter('endtime) { x: Option[Long] => x != None }
+ .write(Tsv("itemsStarttime3.tsv"))
+
+}
\ No newline at end of file
diff --git a/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileItemsSourceTest.scala b/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileItemsSourceTest.scala
index bee565b..ceddca8 100644
--- a/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileItemsSourceTest.scala
+++ b/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileItemsSourceTest.scala
@@ -27,14 +27,14 @@
(iid, itypes.mkString(","))
}.write(Tsv("output"))
- src.readStarttime('iid, 'itypes, 'starttime)
- .mapTo(('iid, 'itypes, 'starttime) -> ('iid, 'itypes, 'starttime)) { fields: (String, List[String], String) =>
- val (iid, itypes, starttime) = fields
+ src.readStartEndtime('iid, 'itypes, 'starttime, 'endtime)
+ .mapTo(('iid, 'itypes, 'starttime, 'endtime) -> ('iid, 'itypes, 'starttime, 'endtime)) { fields: (String, List[String], Long, Option[Long]) =>
+ val (iid, itypes, starttime, endtime) = fields
// during read, itypes are converted from t1,t2,t3 to List[String] = List(t1,t2,t3)
// convert the List back to string with ',' as separator
- (iid, itypes.mkString(","), starttime)
- }.write(Tsv("outputStarttime"))
+ (iid, itypes.mkString(","), starttime.toString, endtime.map(_.toString).getOrElse("PIO_NONE"))
+ }.write(Tsv("outputStartEndtime"))
val writeDataSink = new FileItemsSource("writeDataTestpath", appidArg, None)
@@ -50,33 +50,33 @@
class FileItemsSourceTest extends Specification with TupleConversions {
val test1Input = List(
- ("i0", "t1,t2,t3", "appid", "2293300", "1266673"),
- ("i1", "t2,t3", "appid", "14526361", "12345135"),
- ("i2", "t4", "appid", "14526361", "23423424"),
- ("i3", "t3,t4", "appid", "1231415", "378462511"))
+ ("i0", "t1,t2,t3", "appid", "2293300", "1266673", "666554320"),
+ ("i1", "t2,t3", "appid", "14526361", "12345135", "PIO_NONE"),
+ ("i2", "t4", "appid", "14526361", "23423424", "PIO_NONE"),
+ ("i3", "t3,t4", "appid", "1231415", "378462511", "666554323"))
val test1output_all = test1Input
val test1output_t4 = List(
- ("i2", "t4", "appid", "14526361", "23423424"),
- ("i3", "t3,t4", "appid", "1231415", "378462511"))
+ ("i2", "t4", "appid", "14526361", "23423424", "PIO_NONE"),
+ ("i3", "t3,t4", "appid", "1231415", "378462511", "666554323"))
val test1output_t2t3 = List(
- ("i0", "t1,t2,t3", "appid", "2293300", "1266673"),
- ("i1", "t2,t3", "appid", "14526361", "12345135"),
- ("i3", "t3,t4", "appid", "1231415", "378462511"))
+ ("i0", "t1,t2,t3", "appid", "2293300", "1266673", "666554320"),
+ ("i1", "t2,t3", "appid", "14526361", "12345135", "PIO_NONE"),
+ ("i3", "t3,t4", "appid", "1231415", "378462511", "666554323"))
val test1output_none = List()
def testWithItypes(appid: Int, writeAppid: Int, itypes: List[String],
- inputItems: List[(String, String, String, String, String)],
- outputItems: List[(String, String, String, String, String)]) = {
+ inputItems: List[(String, String, String, String, String, String)],
+ outputItems: List[(String, String, String, String, String, String)]) = {
- val inputSource = inputItems map { case (id, itypes, tempAppid, starttime, ct) => (id, itypes, appid.toString, starttime, ct) }
- val outputExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct) => (id, itypes) }
- val outputStarttimeExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct) => (id, itypes, starttime) }
- val writeDataExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct) => (id, itypes, writeAppid.toString) }
- val writeObjExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct) => (id, itypes, appid.toString, starttime, ct) }
+ val inputSource = inputItems map { case (id, itypes, tempAppid, starttime, ct, endtime) => (id, itypes, appid.toString, starttime, ct, endtime) }
+ val outputExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct, endtime) => (id, itypes) }
+ val outputStartEndtimeExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct, endtime) => (id, itypes, starttime, endtime) }
+ val writeDataExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct, endtime) => (id, itypes, writeAppid.toString) }
+ val writeObjExpected = outputItems map { case (id, itypes, tempAppid, starttime, ct, endtime) => (id, itypes, appid.toString, starttime, ct) }
JobTest("io.prediction.commons.scalding.appdata.file.ReadItypesTestJob")
.arg("appid", appid.toString)
@@ -88,9 +88,9 @@
outputBuffer must containTheSameElementsAs(outputExpected)
}
}
- .sink[(String, String, String)](Tsv("outputStarttime")) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv("outputStartEndtime")) { outputBuffer =>
"correctly read starttime" in {
- outputBuffer must containTheSameElementsAs(outputStarttimeExpected)
+ outputBuffer must containTheSameElementsAs(outputStartEndtimeExpected)
}
}
.sink[(String, String, String)]((new FileItemsSource("writeDataTestpath", appid, None)).getSource) { outputBuffer =>
diff --git a/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSourceTest.scala b/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSourceTest.scala
index 54cb505..76b5742 100644
--- a/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSourceTest.scala
+++ b/process/commons/hadoop/scalding/src/test/scala/io/prediction/commons/scalding/appdata/file/FileU2iActionsSourceTest.scala
@@ -14,15 +14,27 @@
src.readData('action, 'uid, 'iid, 't, 'v)
.mapTo(('action, 'uid, 'iid, 't, 'v) -> ('action, 'uid, 'iid, 't, 'v)) {
- fields: (String, String, String, String, String) =>
- fields
+ fields: (String, String, String, String, Option[String]) =>
+ val (action, uid, iid, t, v) = fields
- fields
+ (action, uid, iid, t, v.getOrElse("PIO_NONE"))
}
.write(Tsv("output"))
}
+class WriteU2iActionsTestJob(args: Args) extends Job(args) {
+
+ val appidArg: Int = args("appid").toInt
+
+ val src = new FileU2iActionsSource("testpath", appidArg)
+ val sink = new FileU2iActionsSource("testpathwr", appidArg)
+
+ src.readData('action, 'uid, 'iid, 't, 'v)
+ .then(sink.writeData('action, 'uid, 'iid, 't, 'v, appidArg) _)
+
+}
+
class FileU2iActionsSourceTest extends Specification with TupleConversions {
// action: String// 0
// uid: String // 1
@@ -30,17 +42,28 @@
// t: String // 3
// v: String // 4
- val test1Input = List(("rate", "uid3", "iid5", "12345", "5"), ("view", "uid2", "iid6", "12346", "2"))
+ val test1Input = List(("rate", "uid3", "iid5", "12345", "5"), ("view", "uid2", "iid6", "12346", "PIO_NONE"))
+ val appid = 1
"ReadU2iActionsTest" should {
JobTest("io.prediction.commons.scalding.appdata.file.ReadU2iActionsTestJob")
- .arg("appid", "1")
- .source(new FileU2iActionsSource("testpath", 1), test1Input)
+ .arg("appid", appid.toString)
+ .source(new FileU2iActionsSource("testpath", appid), test1Input)
.sink[(String, String, String, String, String)](Tsv("output")) { outputBuffer =>
"correctly read from a file" in {
outputBuffer must containTheSameElementsAs(test1Input)
}
}.run.finish
+ }
+ "WriteU2iActionsTest" should {
+ JobTest("io.prediction.commons.scalding.appdata.file.WriteU2iActionsTestJob")
+ .arg("appid", appid.toString)
+ .source(new FileU2iActionsSource("testpath", appid), test1Input)
+ .sink[(String, String, String, String, String)]((new FileU2iActionsSource("testpathwr", appid)).getSource) { outputBuffer =>
+ "correctly read from a file" in {
+ outputBuffer must containTheSameElementsAs(test1Input)
+ }
+ }.run.finish
}
}
\ No newline at end of file
diff --git a/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/main/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimePrep.scala b/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/main/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimePrep.scala
index 59203c0..10b16a8 100644
--- a/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/main/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimePrep.scala
+++ b/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/main/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimePrep.scala
@@ -79,18 +79,19 @@
val selectedU2i = u2iSource.readData('action, 'uid, 'iid, 't, 'v)
.joinWithSmaller('iid -> 'iidx, itemsIidx) // only select actions of these items
- .map(('uid, 'iid, 'action, 'v) -> ('newUid, 'newIid, 'newV)) { fields: (String, String, String, String) =>
+ .map(('uid, 'iid, 'action, 'v) -> ('newUid, 'newIid, 'newV)) { fields: (String, String, String, Option[String]) =>
// NOTE: replace appid prefix by evalid
val (uid, iid, action, v) = fields
val newUid = replacePrefix(uid)
val newIid = replacePrefix(iid)
+ /* TODO remove
// NOTE: add default value 0 for non-rate acitons to work around the optional v field issue
// (cascading-mongo tap can't take optional field yet).
- val newV = if (v == "") "0" else v
-
- (newUid, newIid, newV)
+ val newV = if (v == "") "0" else v */
+
+ (newUid, newIid, v)
}
selectedU2i.then( u2iSink.writeData('action, 'newUid, 'newIid, 't, 'newV, evalidArg) _ ) // NOTE: appid is replaced by evalid
diff --git a/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/test/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimeTest.scala b/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/test/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimeTest.scala
index c5f6ab9..2d19565 100644
--- a/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/test/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimeTest.scala
+++ b/process/engines/commons/evaluations/hadoop/scalding/u2itrainingtestsplit/src/test/scala/io/prediction/evaluations/scalding/commons/u2itrainingtestsplit/U2ITrainingTestSplitTimeTest.scala
@@ -252,8 +252,8 @@
("4", appid+"_u2", appid+"_i3", "1234504", "3"),
("4", appid+"_u1", appid+"_i3", "1234505", "3"),
("4", appid+"_u0", appid+"_i3", "1234509", "3"),
- ("view", appid+"_u0", appid+"_i0", "1234509", ""), // test missing v field case (non-rate action)
- ("like", appid+"_u1", appid+"_i2", "1234509", "")) // test missing v field case (non-rate action)
+ ("view", appid+"_u0", appid+"_i0", "1234509", "PIO_NONE"), // test missing v field case (non-rate action)
+ ("like", appid+"_u1", appid+"_i2", "1234509", "PIO_NONE")) // test missing v field case (non-rate action)
val selectedUsers = List(
(evalid+"_u0", evalid.toString, "123456"),
@@ -282,8 +282,8 @@
("4", evalid+"_u2", evalid+"_i3", "1234504", "3"),
("4", evalid+"_u1", evalid+"_i3", "1234505", "3"),
("4", evalid+"_u0", evalid+"_i3", "1234509", "3"),
- ("view", evalid+"_u0", evalid+"_i0", "1234509", "0"),
- ("like", evalid+"_u1", evalid+"_i2", "1234509", "0"))
+ ("view", evalid+"_u0", evalid+"_i0", "1234509", "PIO_NONE"),
+ ("like", evalid+"_u1", evalid+"_i2", "1234509", "PIO_NONE"))
"U2ITrainingTestSplitTimeTest with timeorder=true" should {
test(List(""), 0.4, 0.3, 0.2, true, appid, evalid,
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparator.scala b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparator.scala
index ee76a7b..f416744 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparator.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparator.scala
@@ -114,7 +114,7 @@
// get items data
val items = Items(appId = trainingAppid, itypes = itypesArg,
- dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readData('iidx, 'itypes)
+ dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
val u2i = U2iActions(appId = trainingAppid,
dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readData('action, 'uid, 'iid, 't, 'v)
@@ -132,7 +132,7 @@
* computation
*/
u2i.joinWithSmaller('iid -> 'iidx, items) // only select actions of these items
- .filter('action, 'v) { fields: (String, String) =>
+ .filter('action, 'v) { fields: (String, Option[String]) =>
val (action, v) = fields
val keepThis: Boolean = action match {
@@ -148,12 +148,19 @@
}
keepThis
}
- .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, String, String) =>
+ .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, Option[String], String) =>
val (action, v, t) = fields
// convert actions into rating value based on "action" and "v" fields
val rating: Int = action match {
- case ACTION_RATE => v.toInt
+ case ACTION_RATE => try {
+ v.get.toInt
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to integer for ${action} action. Exception:" + e)
+ 1
+ }
+ }
case ACTION_LIKE => likeParamArg.getOrElse {
assert(false, "Action type " + action + " should have been filtered out!")
1
@@ -183,10 +190,12 @@
.write(ratingsSink)
// Also store the selected items into DataFile for later model construction usage.
- items.mapTo(('iidx, 'itypes) -> ('iidx, 'itypes)) { fields: (String, List[String]) =>
- val (iidx, itypes) = fields
+ items.mapTo(('iidx, 'itypes, 'starttime, 'endtime) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, List[String], Long, Option[Long]) =>
+ val (iidx, itypes, starttime, endtime) = fields
- (iidx, itypes.mkString(",")) // NOTE: convert List[String] into comma-separated String
+ // NOTE: convert List[String] into comma-separated String
+ // NOTE: endtime is optional
+ (iidx, itypes.mkString(","), starttime, endtime.map(_.toString).getOrElse("PIO_NONE"))
}.write(selectedItemsSink)
/**
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructor.scala b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructor.scala
index 2b2ffb1..a19a075 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructor.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/main/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructor.scala
@@ -25,6 +25,7 @@
* --engineid: <int>
* --algoid: <int>
* --modelSet: <boolean> (true/false). flag to indicate which set
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Optionsl args:
* --dbHost: <string> (eg. "127.0.0.1")
@@ -62,6 +63,7 @@
val DEBUG_TEST = debugArg.contains("test") // test mode
val modelSetArg = args("modelSet").toBoolean
+ val recommendationTimeArg = args("recommendationTime").toLong
/**
* input
@@ -70,16 +72,43 @@
.mapTo((0, 1, 2) -> ('uid, 'iid, 'score)) { fields: (String, String, Double) => fields }
val items = Tsv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "selectedItems.tsv")).read
- .mapTo((0, 1) -> ('iidx, 'itypes)) { fields: (String, String) =>
- val (iidx, itypes) = fields // itypes are comma-separated String
+ .mapTo((0, 1, 2, 3) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, String, Long, String) =>
+ val (iidx, itypes, starttime, endtime) = fields // itypes are comma-separated String
- (iidx, itypes.split(",").toList)
+ val endtimeOpt: Option[Long] = endtime match {
+ case "PIO_NONE" => None
+ case x: String => {
+ try {
+ Some(x.toLong)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert ${x} to Long. Exception: " + e)
+ Some(0)
+ }
+ }
+ }
+ }
+
+ (iidx, itypes.split(",").toList, starttime, endtimeOpt)
}
/**
* process & output
*/
val p = score.joinWithSmaller('iid -> 'iidx, items) // get items info for each iid
+ .filter('starttime, 'endtime) { fields: (Long, Option[Long]) =>
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
.project('uid, 'iid, 'score, 'itypes)
.groupBy('uid) { _.sortBy('score).reverse.toList[(String, Double, List[String])](('iid, 'score, 'itypes) -> 'iidsList) }
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparatorTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparatorTest.scala
index c745e71..899b77b 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparatorTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/DataPreparatorTest.scala
@@ -16,9 +16,14 @@
//val ViewDetails = "viewDetails"
val Conversion = "conversion"
+ val appid = 2
+
def test(itypes: List[String], params: Map[String, String],
- items: List[(String, String)], u2iActions: List[(String, String, String, String, String)],
- ratings: List[(String, String, Int)], selectedItems: List[(String, String)]) = {
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
+ u2iActions: List[(String, String, String, String, String)],
+ ratings: List[(String, String, Int)],
+ selectedItems: List[(String, String, String, String)] // id, itypes, starttime, endtime
+ ) = {
val dbType = "file"
val dbName = "testpath/"
@@ -32,7 +37,7 @@
//.arg("dbHost", dbHost.get)
//.arg("dbPort", dbPort.get.toString)
.arg("hdfsRoot", hdfsRoot)
- .arg("appid", "2")
+ .arg("appid", appid.toString)
.arg("engineid", "4")
.arg("algoid", "5")
.arg("itypes", itypes)
@@ -42,14 +47,14 @@
.arg("conversionParam", params("conversionParam"))
.arg("conflictParam", params("conflictParam"))
//.arg("debug", List("test")) // NOTE: test mode
- .source(Items(appId = 2, itypes = Some(itypes), dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
- .source(U2iActions(appId = 2, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
+ .source(Items(appId = appid, itypes = Some(itypes), dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
+ .source(U2iActions(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
.sink[(String, String, Int)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "ratings.tsv"))) { outputBuffer =>
"correctly process and write data to ratings.tsv" in {
outputBuffer.toList must containTheSameElementsAs(ratings)
}
}
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
"correctly write selectedItems.tsv" in {
outputBuffer.toList must containTheSameElementsAs(selectedItems)
}
@@ -61,8 +66,11 @@
/** no itypes specified */
def testWithoutItypes(params: Map[String, String],
- items: List[(String, String)], u2iActions: List[(String, String, String, String, String)],
- ratings: List[(String, String, Int)], selectedItems: List[(String, String)]) = {
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
+ u2iActions: List[(String, String, String, String, String)],
+ ratings: List[(String, String, Int)],
+ selectedItems: List[(String, String, String, String)] // id, itypes, starttime, endtime
+ ) = {
val dbType = "file"
val dbName = "testpath/"
@@ -76,7 +84,7 @@
//.arg("dbHost", dbHost.get)
//.arg("dbPort", dbPort.get.toString)
.arg("hdfsRoot", hdfsRoot)
- .arg("appid", "2")
+ .arg("appid", appid.toString)
.arg("engineid", "4")
.arg("algoid", "5")
//.arg("itypes", itypes) // NOTE: no itypes args!
@@ -86,14 +94,14 @@
.arg("conversionParam", params("conversionParam"))
.arg("conflictParam", params("conflictParam"))
//.arg("debug", List("test")) // NOTE: test mode
- .source(Items(appId = 2, itypes = None, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
- .source(U2iActions(appId = 2, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
+ .source(Items(appId = appid, itypes = None, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
+ .source(U2iActions(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
.sink[(String, String, Int)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "ratings.tsv"))) { outputBuffer =>
"correctly process and write data to ratings.tsv" in {
outputBuffer.toList must containTheSameElementsAs(ratings)
}
}
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
"correctly write selectedItems.tsv" in {
outputBuffer.toList must containTheSameElementsAs(selectedItems)
}
@@ -103,11 +111,31 @@
}
+ val noEndtime = "PIO_NONE"
/**
* Test 1. basic. Rate actions only without conflicts
*/
val test1AllItypes = List("t1", "t2", "t3", "t4")
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test1ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test1Items = List(
+ test1ItemsMap("i0"),
+ test1ItemsMap("i1"),
+ test1ItemsMap("i2"),
+ test1ItemsMap("i3"))
+
+ def genSelectedItems(items: List[(String, String, String, String, String, String)]) = {
+ items map { x =>
+ val (id, itypes, appid, starttime, ct, endtime) = x
+ (id, itypes, starttime, endtime)
+ }
+ }
+
val test1U2i = List(
(Rate, "u0", "i0", "123450", "3"),
(Rate, "u0", "i1", "123457", "1"),
@@ -128,18 +156,30 @@
"conflictParam" -> "latest")
"itemrec.generic DataPreparator with only rate actions, all itypes, no conflict" should {
- test(test1AllItypes, test1Params, test1Items, test1U2i, test1Ratings, test1Items)
+ test(test1AllItypes, test1Params, test1Items, test1U2i, test1Ratings, genSelectedItems(test1Items))
}
"itemrec.generic DataPreparator with only rate actions, no itypes specified, no conflict" should {
- testWithoutItypes(test1Params, test1Items, test1U2i, test1Ratings, test1Items)
+ testWithoutItypes(test1Params, test1Items, test1U2i, test1Ratings, genSelectedItems(test1Items))
}
/**
* Test 2. rate actions only with conflicts
*/
val test2AllItypes = List("t1", "t2", "t3", "t4")
- val test2Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test2Items = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i1"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
+
val test2U2i = List(
(Rate, "u0", "i0", "123448", "3"),
(Rate, "u0", "i0", "123449", "4"), // highest
@@ -186,7 +226,10 @@
("u1", "i1", 3))
val test2Itypes_t1t4 = List("t1", "t4")
- val test2Items_t1t4 = List(("i0", "t1,t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2Items_t1t4 = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
val test2RatingsHighest_t1t4 = List(
("u0", "i0", 4),
("u0", "i2", 5),
@@ -199,33 +242,45 @@
val test2ParamsLowest = test2Params + ("conflictParam" -> "lowest")
"itemrec.generic DataPreparator with only rate actions, all itypes, conflict=latest" should {
- test(test2AllItypes, test2Params, test2Items, test2U2i, test2RatingsLatest, test2Items)
+ test(test2AllItypes, test2Params, test2Items, test2U2i, test2RatingsLatest, genSelectedItems(test2Items))
}
"itemrec.generic DataPreparator with only rate actions, all itypes, conflict=highest" should {
- test(test2AllItypes, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest, test2Items)
+ test(test2AllItypes, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest, genSelectedItems(test2Items))
}
"itemrec.generic DataPreparator with only rate actions, all itypes, conflict=lowest" should {
- test(test2AllItypes, test2ParamsLowest, test2Items, test2U2i, test2RatingsLowest, test2Items)
+ test(test2AllItypes, test2ParamsLowest, test2Items, test2U2i, test2RatingsLowest, genSelectedItems(test2Items))
}
"itemrec.generic DataPreparator with only rate actions, some itypes, conflict=highest" should {
- test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest_t1t4, test2Items_t1t4)
+ test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest_t1t4, genSelectedItems(test2Items_t1t4))
}
/**
* Test 3. Different Actions without conflicts
*/
val test3AllItypes = List("t1", "t2", "t3", "t4")
- val test3Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test3ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test3Items = List(
+ test3ItemsMap("i0"),
+ test3ItemsMap("i1"),
+ test3ItemsMap("i2"),
+ test3ItemsMap("i3"))
+
val test3U2i = List(
(Rate, "u0", "i0", "123450", "4"),
- (Like, "u0", "i1", "123457", "3"),
- (Dislike, "u0", "i2", "123458", "3"),
- (View, "u0", "i3", "123459", "0"), // NOTE: assume v field won't be missing
+ (Like, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"), // NOTE: assume v field won't be missing
(Rate, "u1", "i0", "123457", "2"),
- (Conversion, "u1", "i1", "123458", "0"))
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"))
val test3Ratings = List(
("u0", "i0", 4),
@@ -239,7 +294,7 @@
"conflictParam" -> "latest")
"itemrec.generic DataPreparator with only all actions, all itypes, no conflict" should {
- test(test3AllItypes, test3Params, test3Items, test3U2i, test3Ratings, test3Items)
+ test(test3AllItypes, test3Params, test3Items, test3U2i, test3Ratings, genSelectedItems(test3Items))
}
/**
@@ -249,27 +304,39 @@
"conflictParam" -> "latest")
val test4AllItypes = List("t1", "t2", "t3", "t4")
- val test4Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test4ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test4Items = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i2"),
+ test4ItemsMap("i3"))
+
val test4U2i = List(
(Rate, "u0", "i0", "123448", "3"),
- (View, "u0", "i0", "123449", "4"), // lowest (2)
- (Like, "u0", "i0", "123451", "0"), // latest, highest (5)
- (Conversion, "u0", "i0", "123450", "1"),
+ (View, "u0", "i0", "123449", "PIO_NONE"), // lowest (2)
+ (Like, "u0", "i0", "123451", "PIO_NONE"), // latest, highest (5)
+ (Conversion, "u0", "i0", "123450", "PIO_NONE"),
(Rate, "u0", "i1", "123456", "1"), // lowest
(Rate, "u0", "i1", "123457", "4"), // highest
- (View, "u0", "i1", "123458", "3"), // latest (2)
+ (View, "u0", "i1", "123458", "PIO_NONE"), // latest (2)
- (Conversion, "u0", "i2", "123461", "2"), // latest, highest (4)
+ (Conversion, "u0", "i2", "123461", "PIO_NONE"), // latest, highest (4)
(Rate, "u0", "i2", "123459", "3"),
- (View, "u0", "i2", "123460", "5"), // lowest
+ (View, "u0", "i2", "123460", "PIO_NONE"), // lowest
(Rate, "u0", "i3", "123459", "2"),
- (View, "u1", "i0", "123457", "5"), // (2)
+ (View, "u1", "i0", "123457", "PIO_NONE"), // (2)
(Rate, "u1", "i1", "123458", "5"), // highest
- (Conversion, "u1", "i1", "123459", "4"), // (4)
- (Dislike, "u1", "i1", "123460", "1")) // latest, lowest (1)
+ (Conversion, "u1", "i1", "123459", "PIO_NONE"), // (4)
+ (Dislike, "u1", "i1", "123460", "PIO_NONE")) // latest, lowest (1)
val test4RatingsLatest = List(
("u0", "i0", 5),
@@ -280,7 +347,7 @@
("u1", "i1", 1))
"itemrec.generic DataPreparator with all actions, all itypes, and conflicts=latest" should {
- test(test4AllItypes, test4Params, test4Items, test4U2i, test4RatingsLatest, test4Items)
+ test(test4AllItypes, test4Params, test4Items, test4U2i, test4RatingsLatest, genSelectedItems(test4Items))
}
val test4ParamsIgnoreView = test4Params + ("viewParam" -> "ignore")
@@ -293,7 +360,7 @@
("u1", "i1", 1))
"itemrec.generic DataPreparator with all actions, all itypes, ignore View actions and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4U2i, test4RatingsIgnoreViewLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4U2i, test4RatingsIgnoreViewLatest, genSelectedItems(test4Items))
}
// note: currently rate action can't be ignored
@@ -308,7 +375,7 @@
("u1", "i1", 5))
"itemrec.generic DataPreparator with all actions, all itypes, ignore all actions except View (and Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4U2i, test4RatingsIgnoreAllExceptViewLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4U2i, test4RatingsIgnoreAllExceptViewLatest, genSelectedItems(test4Items))
}
// note: meaning rate action only
@@ -322,13 +389,17 @@
("u1", "i1", 5))
"itemrec.generic DataPreparator with all actions, all itypes, ignore all actions (except Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4U2i, test4RatingsIgnoreAllLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4U2i, test4RatingsIgnoreAllLatest, genSelectedItems(test4Items))
}
val test4ParamsLowest: Map[String, String] = test4Params + ("conflictParam" -> "lowest")
val test4Itypes_t3 = List("t3")
- val test4Items_t3 = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i3", "t3,t4"))
+ val test4Items_t3 = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i3"))
+
val test4RatingsLowest_t3 = List(
("u0", "i0", 2),
("u0", "i1", 1),
@@ -337,7 +408,7 @@
("u1", "i1", 1))
"itemrec.generic DataPreparator with all actions, some itypes, and conflicts=lowest" should {
- test(test4Itypes_t3, test4ParamsLowest, test4Items, test4U2i, test4RatingsLowest_t3, test4Items_t3)
+ test(test4Itypes_t3, test4ParamsLowest, test4Items, test4U2i, test4RatingsLowest_t3, genSelectedItems(test4Items_t3))
}
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructorTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructorTest.scala
index 7f3246d..4aa74a7 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructorTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/generic/src/test/scala/io/prediction/algorithms/scalding/itemrec/generic/ModelConstructorTest.scala
@@ -8,14 +8,16 @@
import io.prediction.commons.scalding.modeldata.ItemRecScores
class ModelConstructorTest extends Specification with TupleConversions {
- "itemrec.generic ModelConstructor in test mode" should {
+
+ def test(recommendationTime: Long,
+ items: List[(String, String, String, String)], //iid, itypes, starttime, endtime
+ itemRecScores: List[(String, String, String)],
+ output: List[(String, String, String, String)]) = {
+
val appid = 3
val engineid = 4
val algoid = 7
val modelSet = true
- val test1ItemRecScores = List(("u0", "i1", "0.123"), ("u0", "i2", "0.456"), ("u1", "i0", "1.23"))
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t1,t2"), ("i2", "t2,t3"))
- val test1Output = List(("u0", "i2,i1", "0.456,0.123", "[t2,t3],[t1,t2]", algoid, modelSet), ("u1", "i0", "1.23", "[t1,t2,t3]", algoid, modelSet))
val dbType = "file"
val dbName = "testpath/"
@@ -23,6 +25,8 @@
val dbPort = None
val hdfsRoot = "testroot/"
+ val outputItemRecScores = output map { case (uid, iid, score, itypes) => (uid, iid, score, itypes, algoid, modelSet) }
+
JobTest("io.prediction.algorithms.scalding.itemrec.generic.ModelConstructor")
.arg("dbType", dbType)
.arg("dbName", dbName)
@@ -31,15 +35,109 @@
.arg("engineid", engineid.toString)
.arg("algoid", algoid.toString)
.arg("modelSet", modelSet.toString)
+ .arg("recommendationTime", recommendationTime.toString)
//.arg("debug", "test") // NOTE: test mode
- .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, None, "itemRecScores.tsv")), test1ItemRecScores)
- .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, None, "selectedItems.tsv")), test1Items)
+ .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, None, "itemRecScores.tsv")), itemRecScores)
+ .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, None, "selectedItems.tsv")), items)
.sink[(String, String, String, String, Int, Boolean)](ItemRecScores(dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort, algoid = algoid, modelset = modelSet).getSource) { outputBuffer =>
"correctly write model data to a file" in {
- outputBuffer.toList must containTheSameElementsAs(test1Output)
+ outputBuffer.toList must containTheSameElementsAs(outputItemRecScores)
}
}
.run
.finish
}
+
+ val largeNumber = 1234567890 // larger than any item starttime
+ val noEndtime = "PIO_NONE"
+
+ /* test 1 */
+ val test1ItemRecScores = List(("u0", "i1", "0.123"), ("u0", "i2", "0.456"), ("u1", "i0", "1.23"))
+ val test1Items = List(
+ ("i0", "t1,t2,t3", "12346", noEndtime),
+ ("i1", "t1,t2", "12347", noEndtime),
+ ("i2", "t2,t3", "12348", noEndtime))
+
+ val test1Output = List(
+ ("u0", "i2,i1", "0.456,0.123", "[t2,t3],[t1,t2]"),
+ ("u1", "i0", "1.23", "[t1,t2,t3]"))
+
+ "itemrec.generic ModelConstructor" should {
+ test(largeNumber, test1Items, test1ItemRecScores, test1Output)
+ }
+
+ /* test 2: test starttime and endtime */
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test2ItemRecScores = List(
+ ("u0", "i1", "0.123"),
+ ("u0", "i2", "0.456"),
+ ("u0", "i3", "0.2"),
+ ("u1", "i0", "12"),
+ ("u1", "i2", "2"))
+
+ val test2Items = List(
+ ("i0", "t1,t2,t3", "123123", "543210"),
+ ("i1", "t1,t2", "123456", "543321"),
+ ("i2", "t2,t3", "123567", "543432"),
+ ("i3", "t2", "123678", "543654"))
+
+ val test2Output = List(
+ ("u0", "i2,i3,i1", "0.456,0.2,0.123", "[t2,t3],[t2],[t1,t2]"),
+ ("u1", "i0,i2", "12.0,2.0", "[t1,t2,t3],[t2,t3]"))
+
+ val test2OutputEmpty = List()
+
+ val test2Outputi0 = List(
+ ("u1", "i0", "12.0", "[t1,t2,t3]"))
+
+ val test2Outputi0i1 = List(
+ ("u0", "i1", "0.123", "[t1,t2]"),
+ ("u1", "i0", "12.0", "[t1,t2,t3]"))
+
+ val test2Outputi2i3 = List(
+ ("u0", "i2,i3", "0.456,0.2", "[t2,t3],[t2]"),
+ ("u1", "i2", "2.0", "[t2,t3]"))
+
+ "recommendationTime < all item starttime" should {
+ test(tA, test2Items, test2ItemRecScores, test2OutputEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(tB, test2Items, test2ItemRecScores, test2Outputi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(tC, test2Items, test2ItemRecScores, test2Outputi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(tD, test2Items, test2ItemRecScores, test2Output)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(tE, test2Items, test2ItemRecScores, test2Outputi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(tF, test2Items, test2ItemRecScores, test2OutputEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(tG, test2Items, test2ItemRecScores, test2OutputEmpty)
+ }
+
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparator.scala b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparator.scala
index c1a6fd9..709ef42 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparator.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparator.scala
@@ -114,7 +114,7 @@
// get items data
val items = Items(appId = trainingAppid, itypes = itypesArg,
- dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readData('iidx, 'itypes)
+ dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
val u2i = U2iActions(appId = trainingAppid,
dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readData('action, 'uid, 'iid, 't, 'v)
@@ -132,7 +132,7 @@
* computation
*/
u2i.joinWithSmaller('iid -> 'iidx, items) // only select actions of these items
- .filter('action, 'v) { fields: (String, String) =>
+ .filter('action, 'v) { fields: (String, Option[String]) =>
val (action, v) = fields
val keepThis: Boolean = action match {
@@ -148,12 +148,19 @@
}
keepThis
}
- .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, String, String) =>
+ .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, Option[String], String) =>
val (action, v, t) = fields
// convert actions into rating value based on "action" and "v" fields
val rating: Int = action match {
- case ACTION_RATE => v.toInt
+ case ACTION_RATE => try {
+ v.get.toInt
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to integer for ${action} action. Exception:" + e)
+ 1
+ }
+ }
case ACTION_LIKE => likeParamArg.getOrElse {
assert(false, "Action type " + action + " should have been filtered out!")
1
@@ -183,10 +190,12 @@
.write(ratingsSink)
// Also store the selected items into DataFile for later model construction usage.
- items.mapTo(('iidx, 'itypes) -> ('iidx, 'itypes)) { fields: (String, List[String]) =>
- val (iidx, itypes) = fields
+ items.mapTo(('iidx, 'itypes, 'starttime, 'endtime) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, List[String], Long, Option[Long]) =>
+ val (iidx, itypes, starttime, endtime) = fields
- (iidx, itypes.mkString(",")) // NOTE: convert List[String] into comma-separated String
+ // NOTE: convert List[String] into comma-separated String
+ // NOTE: endtime is optional
+ (iidx, itypes.mkString(","), starttime, endtime.map(_.toString).getOrElse("PIO_NONE"))
}.write(selectedItemsSink)
/**
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructor.scala b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructor.scala
index 5b0ac60..02e193f 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructor.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/main/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructor.scala
@@ -25,6 +25,7 @@
* --engineid: <int>
* --algoid: <int>
* --modelSet: <boolean> (true/false). flag to indicate which set
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Optionsl args:
* --dbHost: <string> (eg. "127.0.0.1")
@@ -62,6 +63,7 @@
val DEBUG_TEST = debugArg.contains("test") // test mode
val modelSetArg = args("modelSet").toBoolean
+ val recommendationTimeArg = args("recommendationTime").toLong
/**
* input
@@ -70,16 +72,43 @@
.mapTo((0, 1, 2) -> ('uid, 'iid, 'score)) { fields: (String, String, Double) => fields }
val items = Tsv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "selectedItems.tsv")).read
- .mapTo((0, 1) -> ('iidx, 'itypes)) { fields: (String, String) =>
- val (iidx, itypes) = fields // itypes are comma-separated String
+ .mapTo((0, 1, 2, 3) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, String, Long, String) =>
+ val (iidx, itypes, starttime, endtime) = fields // itypes are comma-separated String
- (iidx, itypes.split(",").toList)
+ val endtimeOpt: Option[Long] = endtime match {
+ case "PIO_NONE" => None
+ case x: String => {
+ try {
+ Some(x.toLong)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert ${x} to Long. Exception: " + e)
+ Some(0)
+ }
+ }
+ }
+ }
+
+ (iidx, itypes.split(",").toList, starttime, endtimeOpt)
}
/**
* process & output
*/
val p = score.joinWithSmaller('iid -> 'iidx, items) // get items info for each iid
+ .filter('starttime, 'endtime) { fields: (Long, Option[Long]) =>
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
.project('uid, 'iid, 'score, 'itypes)
.groupBy('uid) { _.sortBy('score).reverse.toList[(String, Double, List[String])](('iid, 'score, 'itypes) -> 'iidsList) }
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparatorTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparatorTest.scala
index 8539a21..837eee0 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparatorTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/DataPreparatorTest.scala
@@ -16,9 +16,14 @@
//val ViewDetails = "viewDetails"
val Conversion = "conversion"
+ val appid = 2
+
def test(itypes: List[String], params: Map[String, String],
- items: List[(String, String)], u2iActions: List[(String, String, String, String, String)],
- ratings: List[(String, String, Int)], selectedItems: List[(String, String)]) = {
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
+ u2iActions: List[(String, String, String, String, String)],
+ ratings: List[(String, String, Int)],
+ selectedItems: List[(String, String, String, String)] // id, itypes, starttime, endtime
+ ) = {
val dbType = "file"
val dbName = "testpath/"
@@ -32,7 +37,7 @@
//.arg("dbHost", dbHost.get)
//.arg("dbPort", dbPort.get.toString)
.arg("hdfsRoot", hdfsRoot)
- .arg("appid", "2")
+ .arg("appid", appid.toString)
.arg("engineid", "4")
.arg("algoid", "5")
.arg("itypes", itypes)
@@ -42,14 +47,14 @@
.arg("conversionParam", params("conversionParam"))
.arg("conflictParam", params("conflictParam"))
//.arg("debug", List("test")) // NOTE: test mode
- .source(Items(appId = 2, itypes = Some(itypes), dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
- .source(U2iActions(appId = 2, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
+ .source(Items(appId = appid, itypes = Some(itypes), dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
+ .source(U2iActions(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
.sink[(String, String, Int)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "ratings.tsv"))) { outputBuffer =>
"correctly process and write data to ratings.tsv" in {
outputBuffer.toList must containTheSameElementsAs(ratings)
}
}
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
"correctly write selectedItems.tsv" in {
outputBuffer.toList must containTheSameElementsAs(selectedItems)
}
@@ -61,8 +66,11 @@
/** no itypes specified */
def testWithoutItypes(params: Map[String, String],
- items: List[(String, String)], u2iActions: List[(String, String, String, String, String)],
- ratings: List[(String, String, Int)], selectedItems: List[(String, String)]) = {
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
+ u2iActions: List[(String, String, String, String, String)],
+ ratings: List[(String, String, Int)],
+ selectedItems: List[(String, String, String, String)] // id, itypes, starttime, endtime
+ ) = {
val dbType = "file"
val dbName = "testpath/"
@@ -76,7 +84,7 @@
//.arg("dbHost", dbHost.get)
//.arg("dbPort", dbPort.get.toString)
.arg("hdfsRoot", hdfsRoot)
- .arg("appid", "2")
+ .arg("appid", appid.toString)
.arg("engineid", "4")
.arg("algoid", "5")
//.arg("itypes", itypes) // NOTE: no itypes args!
@@ -86,14 +94,14 @@
.arg("conversionParam", params("conversionParam"))
.arg("conflictParam", params("conflictParam"))
//.arg("debug", List("test")) // NOTE: test mode
- .source(Items(appId = 2, itypes = None, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
- .source(U2iActions(appId = 2, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
+ .source(Items(appId = appid, itypes = None, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
+ .source(U2iActions(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
.sink[(String, String, Int)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "ratings.tsv"))) { outputBuffer =>
"correctly process and write data to ratings.tsv" in {
outputBuffer.toList must containTheSameElementsAs(ratings)
}
}
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
"correctly write selectedItems.tsv" in {
outputBuffer.toList must containTheSameElementsAs(selectedItems)
}
@@ -103,11 +111,31 @@
}
+ val noEndtime = "PIO_NONE"
/**
* Test 1. basic. Rate actions only without conflicts
*/
val test1AllItypes = List("t1", "t2", "t3", "t4")
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test1ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test1Items = List(
+ test1ItemsMap("i0"),
+ test1ItemsMap("i1"),
+ test1ItemsMap("i2"),
+ test1ItemsMap("i3"))
+
+ def genSelectedItems(items: List[(String, String, String, String, String, String)]) = {
+ items map { x =>
+ val (id, itypes, appid, starttime, ct, endtime) = x
+ (id, itypes, starttime, endtime)
+ }
+ }
+
val test1U2i = List(
(Rate, "u0", "i0", "123450", "3"),
(Rate, "u0", "i1", "123457", "1"),
@@ -128,18 +156,30 @@
"conflictParam" -> "latest")
"itemrec.knnitembased DataPreparator with only rate actions, all itypes, no conflict" should {
- test(test1AllItypes, test1Params, test1Items, test1U2i, test1Ratings, test1Items)
+ test(test1AllItypes, test1Params, test1Items, test1U2i, test1Ratings, genSelectedItems(test1Items))
}
"itemrec.knnitembased DataPreparator with only rate actions, no itypes specified, no conflict" should {
- testWithoutItypes(test1Params, test1Items, test1U2i, test1Ratings, test1Items)
+ testWithoutItypes(test1Params, test1Items, test1U2i, test1Ratings, genSelectedItems(test1Items))
}
/**
* Test 2. rate actions only with conflicts
*/
val test2AllItypes = List("t1", "t2", "t3", "t4")
- val test2Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test2Items = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i1"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
+
val test2U2i = List(
(Rate, "u0", "i0", "123448", "3"),
(Rate, "u0", "i0", "123449", "4"), // highest
@@ -186,7 +226,10 @@
("u1", "i1", 3))
val test2Itypes_t1t4 = List("t1", "t4")
- val test2Items_t1t4 = List(("i0", "t1,t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2Items_t1t4 = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
val test2RatingsHighest_t1t4 = List(
("u0", "i0", 4),
("u0", "i2", 5),
@@ -199,33 +242,45 @@
val test2ParamsLowest = test2Params + ("conflictParam" -> "lowest")
"itemrec.knnitembased DataPreparator with only rate actions, all itypes, conflict=latest" should {
- test(test2AllItypes, test2Params, test2Items, test2U2i, test2RatingsLatest, test2Items)
+ test(test2AllItypes, test2Params, test2Items, test2U2i, test2RatingsLatest, genSelectedItems(test2Items))
}
"itemrec.knnitembased DataPreparator with only rate actions, all itypes, conflict=highest" should {
- test(test2AllItypes, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest, test2Items)
+ test(test2AllItypes, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest, genSelectedItems(test2Items))
}
"itemrec.knnitembased DataPreparator with only rate actions, all itypes, conflict=lowest" should {
- test(test2AllItypes, test2ParamsLowest, test2Items, test2U2i, test2RatingsLowest, test2Items)
+ test(test2AllItypes, test2ParamsLowest, test2Items, test2U2i, test2RatingsLowest, genSelectedItems(test2Items))
}
"itemrec.knnitembased DataPreparator with only rate actions, some itypes, conflict=highest" should {
- test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest_t1t4, test2Items_t1t4)
+ test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest_t1t4, genSelectedItems(test2Items_t1t4))
}
/**
* Test 3. Different Actions without conflicts
*/
val test3AllItypes = List("t1", "t2", "t3", "t4")
- val test3Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test3ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test3Items = List(
+ test3ItemsMap("i0"),
+ test3ItemsMap("i1"),
+ test3ItemsMap("i2"),
+ test3ItemsMap("i3"))
+
val test3U2i = List(
(Rate, "u0", "i0", "123450", "4"),
- (Like, "u0", "i1", "123457", "3"),
- (Dislike, "u0", "i2", "123458", "3"),
- (View, "u0", "i3", "123459", "0"), // NOTE: assume v field won't be missing
+ (Like, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"), // NOTE: assume v field won't be missing
(Rate, "u1", "i0", "123457", "2"),
- (Conversion, "u1", "i1", "123458", "0"))
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"))
val test3Ratings = List(
("u0", "i0", 4),
@@ -239,7 +294,7 @@
"conflictParam" -> "latest")
"itemrec.knnitembased DataPreparator with only all actions, all itypes, no conflict" should {
- test(test3AllItypes, test3Params, test3Items, test3U2i, test3Ratings, test3Items)
+ test(test3AllItypes, test3Params, test3Items, test3U2i, test3Ratings, genSelectedItems(test3Items))
}
/**
@@ -249,27 +304,39 @@
"conflictParam" -> "latest")
val test4AllItypes = List("t1", "t2", "t3", "t4")
- val test4Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test4ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test4Items = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i2"),
+ test4ItemsMap("i3"))
+
val test4U2i = List(
(Rate, "u0", "i0", "123448", "3"),
- (View, "u0", "i0", "123449", "4"), // lowest (2)
- (Like, "u0", "i0", "123451", "0"), // latest, highest (5)
- (Conversion, "u0", "i0", "123450", "1"),
+ (View, "u0", "i0", "123449", "PIO_NONE"), // lowest (2)
+ (Like, "u0", "i0", "123451", "PIO_NONE"), // latest, highest (5)
+ (Conversion, "u0", "i0", "123450", "PIO_NONE"),
(Rate, "u0", "i1", "123456", "1"), // lowest
(Rate, "u0", "i1", "123457", "4"), // highest
- (View, "u0", "i1", "123458", "3"), // latest (2)
+ (View, "u0", "i1", "123458", "PIO_NONE"), // latest (2)
- (Conversion, "u0", "i2", "123461", "2"), // latest, highest (4)
+ (Conversion, "u0", "i2", "123461", "PIO_NONE"), // latest, highest (4)
(Rate, "u0", "i2", "123459", "3"),
- (View, "u0", "i2", "123460", "5"), // lowest
+ (View, "u0", "i2", "123460", "PIO_NONE"), // lowest
(Rate, "u0", "i3", "123459", "2"),
- (View, "u1", "i0", "123457", "5"), // (2)
+ (View, "u1", "i0", "123457", "PIO_NONE"), // (2)
(Rate, "u1", "i1", "123458", "5"), // highest
- (Conversion, "u1", "i1", "123459", "4"), // (4)
- (Dislike, "u1", "i1", "123460", "1")) // latest, lowest (1)
+ (Conversion, "u1", "i1", "123459", "PIO_NONE"), // (4)
+ (Dislike, "u1", "i1", "123460", "PIO_NONE")) // latest, lowest (1)
val test4RatingsLatest = List(
("u0", "i0", 5),
@@ -280,7 +347,7 @@
("u1", "i1", 1))
"itemrec.knnitembased DataPreparator with all actions, all itypes, and conflicts=latest" should {
- test(test4AllItypes, test4Params, test4Items, test4U2i, test4RatingsLatest, test4Items)
+ test(test4AllItypes, test4Params, test4Items, test4U2i, test4RatingsLatest, genSelectedItems(test4Items))
}
val test4ParamsIgnoreView = test4Params + ("viewParam" -> "ignore")
@@ -293,7 +360,7 @@
("u1", "i1", 1))
"itemrec.knnitembased DataPreparator with all actions, all itypes, ignore View actions and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4U2i, test4RatingsIgnoreViewLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4U2i, test4RatingsIgnoreViewLatest, genSelectedItems(test4Items))
}
// note: currently rate action can't be ignored
@@ -308,7 +375,7 @@
("u1", "i1", 5))
"itemrec.knnitembased DataPreparator with all actions, all itypes, ignore all actions except View (and Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4U2i, test4RatingsIgnoreAllExceptViewLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4U2i, test4RatingsIgnoreAllExceptViewLatest, genSelectedItems(test4Items))
}
// note: meaning rate action only
@@ -322,13 +389,17 @@
("u1", "i1", 5))
"itemrec.knnitembased DataPreparator with all actions, all itypes, ignore all actions (except Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4U2i, test4RatingsIgnoreAllLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4U2i, test4RatingsIgnoreAllLatest, genSelectedItems(test4Items))
}
val test4ParamsLowest: Map[String, String] = test4Params + ("conflictParam" -> "lowest")
val test4Itypes_t3 = List("t3")
- val test4Items_t3 = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i3", "t3,t4"))
+ val test4Items_t3 = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i3"))
+
val test4RatingsLowest_t3 = List(
("u0", "i0", 2),
("u0", "i1", 1),
@@ -337,7 +408,7 @@
("u1", "i1", 1))
"itemrec.knnitembased DataPreparator with all actions, some itypes, and conflicts=lowest" should {
- test(test4Itypes_t3, test4ParamsLowest, test4Items, test4U2i, test4RatingsLowest_t3, test4Items_t3)
+ test(test4Itypes_t3, test4ParamsLowest, test4Items, test4U2i, test4RatingsLowest_t3, genSelectedItems(test4Items_t3))
}
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructorTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructorTest.scala
index 32dd1db..7934e00 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructorTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/knnitembased/src/test/scala/io/prediction/algorithms/scalding/itemrec/knnitembased/ModelConstructorTest.scala
@@ -8,14 +8,16 @@
import io.prediction.commons.scalding.modeldata.ItemRecScores
class ModelConstructorTest extends Specification with TupleConversions {
- "itemrec.knnitembased ModelConstructor in test mode" should {
+
+ def test(recommendationTime: Long,
+ items: List[(String, String, String, String)], //iid, itypes, starttime, endtime
+ itemRecScores: List[(String, String, String)],
+ output: List[(String, String, String, String)]) = {
+
val appid = 3
val engineid = 4
val algoid = 7
val modelSet = true
- val test1ItemRecScores = List(("u0", "i1", "0.123"), ("u0", "i2", "0.456"), ("u1", "i0", "1.23"))
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t1,t2"), ("i2", "t2,t3"))
- val test1Output = List(("u0", "i2,i1", "0.456,0.123", "[t2,t3],[t1,t2]", algoid, modelSet), ("u1", "i0", "1.23", "[t1,t2,t3]", algoid, modelSet))
val dbType = "file"
val dbName = "testpath/"
@@ -23,6 +25,8 @@
val dbPort = None
val hdfsRoot = "testroot/"
+ val outputItemRecScores = output map { case (uid, iid, score, itypes) => (uid, iid, score, itypes, algoid, modelSet) }
+
JobTest("io.prediction.algorithms.scalding.itemrec.knnitembased.ModelConstructor")
.arg("dbType", dbType)
.arg("dbName", dbName)
@@ -31,15 +35,109 @@
.arg("engineid", engineid.toString)
.arg("algoid", algoid.toString)
.arg("modelSet", modelSet.toString)
+ .arg("recommendationTime", recommendationTime.toString)
//.arg("debug", "test") // NOTE: test mode
- .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, None, "itemRecScores.tsv")), test1ItemRecScores)
- .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, None, "selectedItems.tsv")), test1Items)
+ .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, None, "itemRecScores.tsv")), itemRecScores)
+ .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, None, "selectedItems.tsv")), items)
.sink[(String, String, String, String, Int, Boolean)](ItemRecScores(dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort, algoid = algoid, modelset = modelSet).getSource) { outputBuffer =>
"correctly write model data to a file" in {
- outputBuffer.toList must containTheSameElementsAs(test1Output)
+ outputBuffer.toList must containTheSameElementsAs(outputItemRecScores)
}
}
.run
.finish
}
+
+ val largeNumber = 1234567890 // larger than any item starttime
+ val noEndtime = "PIO_NONE"
+
+ /* test 1 */
+ val test1ItemRecScores = List(("u0", "i1", "0.123"), ("u0", "i2", "0.456"), ("u1", "i0", "1.23"))
+ val test1Items = List(
+ ("i0", "t1,t2,t3", "12346", noEndtime),
+ ("i1", "t1,t2", "12347", noEndtime),
+ ("i2", "t2,t3", "12348", noEndtime))
+
+ val test1Output = List(
+ ("u0", "i2,i1", "0.456,0.123", "[t2,t3],[t1,t2]"),
+ ("u1", "i0", "1.23", "[t1,t2,t3]"))
+
+ "itemrec.knnitembased ModelConstructor" should {
+ test(largeNumber, test1Items, test1ItemRecScores, test1Output)
+ }
+
+ /* test 2: test starttime and endtime */
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test2ItemRecScores = List(
+ ("u0", "i1", "0.123"),
+ ("u0", "i2", "0.456"),
+ ("u0", "i3", "0.2"),
+ ("u1", "i0", "12"),
+ ("u1", "i2", "2"))
+
+ val test2Items = List(
+ ("i0", "t1,t2,t3", "123123", "543210"),
+ ("i1", "t1,t2", "123456", "543321"),
+ ("i2", "t2,t3", "123567", "543432"),
+ ("i3", "t2", "123678", "543654"))
+
+ val test2Output = List(
+ ("u0", "i2,i3,i1", "0.456,0.2,0.123", "[t2,t3],[t2],[t1,t2]"),
+ ("u1", "i0,i2", "12.0,2.0", "[t1,t2,t3],[t2,t3]"))
+
+ val test2OutputEmpty = List()
+
+ val test2Outputi0 = List(
+ ("u1", "i0", "12.0", "[t1,t2,t3]"))
+
+ val test2Outputi0i1 = List(
+ ("u0", "i1", "0.123", "[t1,t2]"),
+ ("u1", "i0", "12.0", "[t1,t2,t3]"))
+
+ val test2Outputi2i3 = List(
+ ("u0", "i2,i3", "0.456,0.2", "[t2,t3],[t2]"),
+ ("u1", "i2", "2.0", "[t2,t3]"))
+
+ "recommendationTime < all item starttime" should {
+ test(tA, test2Items, test2ItemRecScores, test2OutputEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(tB, test2Items, test2ItemRecScores, test2Outputi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(tC, test2Items, test2ItemRecScores, test2Outputi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(tD, test2Items, test2ItemRecScores, test2Output)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(tE, test2Items, test2ItemRecScores, test2Outputi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(tF, test2Items, test2ItemRecScores, test2OutputEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(tG, test2Items, test2ItemRecScores, test2OutputEmpty)
+ }
+
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRank.scala b/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRank.scala
index 9a7171a..df3d7fc 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRank.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRank.scala
@@ -33,8 +33,8 @@
*
* --itypes: <string separated by white space>. optional. eg "--itypes type1 type2". If no --itypes specified, then ALL itypes will be used.
* --numRecommendations: <int>. number of recommendations to be generated
- *
* --modelSet: <boolean> (true/false). flag to indicate which set
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Example:
* hadoop jar PredictionIO-Process-Hadoop-Scala-assembly-0.1.jar io.prediction.algorithms.scalding.itemrec.latestrank.LatestRank --hdfs --training_dbType mongodb --training_dbName predictionio_appdata --training_dbHost localhost --training_dbPort 27017 --modeldata_dbType mongodb --modeldata_dbName predictionio_modeldata --modeldata_dbHost localhost --modeldata_dbPort 27017 --hdfsRoot predictionio/ --appid 1 --engineid 1 --algoid 18 --modelSet true
@@ -68,6 +68,7 @@
val numRecommendationsArg = args("numRecommendations").toInt
val modelSetArg = args("modelSet").toBoolean
+ val recommendationTimeArg = args("recommendationTime").toLong
/**
* source
@@ -79,7 +80,7 @@
// get items data
val items = Items(appId = trainingAppid, itypes = itypesArg,
- dbType = training_dbTypeArg, dbName = training_dbNameArg, dbHost = training_dbHostArg, dbPort = training_dbPortArg).readStarttime('iidx, 'itypes, 'starttime)
+ dbType = training_dbTypeArg, dbName = training_dbNameArg, dbHost = training_dbHostArg, dbPort = training_dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
val users = Users(appId = trainingAppid,
dbType = training_dbTypeArg, dbName = training_dbNameArg, dbHost = training_dbHostArg, dbPort = training_dbPortArg).readData('uid)
@@ -89,16 +90,29 @@
*/
val itemRecScores = ItemRecScores(dbType = modeldata_dbTypeArg, dbName = modeldata_dbNameArg, dbHost = modeldata_dbHostArg, dbPort = modeldata_dbPortArg, algoid = algoidArg, modelset = modelSetArg)
- val scoresFile = Tsv(AlgoFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "itemRecScores.tsv"))
-
/**
* computation
*/
- val itemsWithKey = items.map(() -> 'itemKey) { u: Unit => 1 }
+ val itemsWithKey = items
+ .filter('starttime, 'endtime) { fields: (Long, Option[Long]) =>
+ // only keep items with valid starttime and endtime
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
+ .map(() -> 'itemKey) { u: Unit => 1 }
val usersWithKey = users.map(() -> 'userKey) { u: Unit => 1 }
val scores = usersWithKey.joinWithSmaller('userKey -> 'itemKey, itemsWithKey)
- .map('starttime -> 'score) { t: String => t.toDouble }
+ .map('starttime -> 'score) { t: Long => t.toDouble }
.project('uid, 'iidx, 'score, 'itypes)
.groupBy('uid) { _.sortBy('score).reverse.take(numRecommendationsArg) }
// another way to is to do toList then take top n from List. But then it would create an unncessary long List
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRankTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRankTest.scala
index d1fc1cd..f5e0451 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRankTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/latestrank/LatestRankTest.scala
@@ -13,7 +13,8 @@
def test(algoid: Int, modelSet: Boolean,
itypes: List[String],
numRecommendations: Int,
- items: List[(String, String, String, String)],
+ recommendationTime: Long,
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
users: List[(String, String)],
itemRecScores: List[(String, String, String, String, Int, Boolean)]) = {
@@ -41,6 +42,7 @@
.arg("itypes", itypes)
.arg("numRecommendations", numRecommendations.toString)
.arg("modelSet", modelSet.toString)
+ .arg("recommendationTime", recommendationTime.toString)
.source(Items(appId = appid, itypes = Some(itypes),
dbType = training_dbType, dbName = training_dbName, dbHost = None, dbPort = None).getSource, items)
.source(Users(appId = appid,
@@ -64,46 +66,136 @@
.finish
}
- val algoid = 12
- val modelSet = false
- val itypesT1T2 = List("t1", "t2")
- val itypesAll = List("t1", "t2", "t3", "t4")
- val items = List(("i0", "t1,t2,t3", "19", "123456"), ("i1", "t2,t3", "19", "123457"), ("i2", "t4", "19", "21"), ("i3", "t3,t4", "19", "9876543210"))
- val users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
- val itemRecScoresT1T2 = List(
- ("u0", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
- ("u1", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
- ("u2", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
- ("u3", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet))
+ val largeNumber: Long = scala.Long.MaxValue // larger than any item starttime
+ val noEndtime = "PIO_NONE"
- val itemRecScoresAll = List(
- ("u0", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", algoid, modelSet),
- ("u1", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", algoid, modelSet),
- ("u2", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", algoid, modelSet),
- ("u3", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", algoid, modelSet))
+ /* test 1 */
+ val test1Algoid = 12
+ val test1ModelSet = false
+ val test1ItypesT1T2 = List("t1", "t2")
+ val test1ItypesAll = List("t1", "t2", "t3", "t4")
+ val test1Items = List(
+ ("i0", "t1,t2,t3", "19", "123456", "345678", noEndtime),
+ ("i1", "t2,t3", "19", "123457", "567890", noEndtime),
+ ("i2", "t4", "19", "21", "88", noEndtime),
+ ("i3", "t3,t4", "19", "9876543210", "67890", noEndtime))
+ val test1Users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
+ val test1ItemRecScoresT1T2 = List(
+ ("u0", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", test1Algoid, test1ModelSet),
+ ("u1", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", test1Algoid, test1ModelSet),
+ ("u2", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", test1Algoid, test1ModelSet),
+ ("u3", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", test1Algoid, test1ModelSet))
- val itemRecScoresAllTop2 = List(
- ("u0", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", algoid, modelSet),
- ("u1", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", algoid, modelSet),
- ("u2", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", algoid, modelSet),
- ("u3", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", algoid, modelSet))
+ val test1ItemRecScoresAll = List(
+ ("u0", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", test1Algoid, test1ModelSet),
+ ("u1", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", test1Algoid, test1ModelSet),
+ ("u2", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", test1Algoid, test1ModelSet),
+ ("u3", "i3,i1,i0,i2", "9876543210.0,123457.0,123456.0,21.0", "[t3,t4],[t2,t3],[t1,t2,t3],[t4]", test1Algoid, test1ModelSet))
+
+ val test1ItemRecScoresAllTop2 = List(
+ ("u0", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", test1Algoid, test1ModelSet),
+ ("u1", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", test1Algoid, test1ModelSet),
+ ("u2", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", test1Algoid, test1ModelSet),
+ ("u3", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", test1Algoid, test1ModelSet))
"latestrank.LatestRank with some itypes and numRecommendations larger than number of items" should {
- test(algoid, modelSet, itypesT1T2, 500, items, users, itemRecScoresT1T2)
+ test(test1Algoid, test1ModelSet, test1ItypesT1T2, 500, largeNumber, test1Items, test1Users, test1ItemRecScoresT1T2)
}
"latestrank.LatestRank with all itypes and numRecommendations larger than number of items" should {
- test(algoid, modelSet, itypesAll, 500, items, users, itemRecScoresAll)
+ test(test1Algoid, test1ModelSet, test1ItypesAll, 500, largeNumber, test1Items, test1Users, test1ItemRecScoresAll)
}
"latestrank.LatestRank with all itypes numRecommendations smaller than number of items" should {
- test(algoid, modelSet, itypesAll, 2, items, users, itemRecScoresAllTop2)
+ test(test1Algoid, test1ModelSet, test1ItypesAll, 2, largeNumber, test1Items, test1Users, test1ItemRecScoresAllTop2)
}
+ /* test 2: test starttime and endtime */
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test2Algoid = 12
+ val test2ModelSet = false
+
+ val test2ItypesAll = List("t1", "t2", "t3", "t4")
+ val test2Items = List(
+ ("i0", "t1,t2,t3", "19", "123123", "4", "543210"),
+ ("i1", "t2,t3", "19", "123456", "5", "543321"),
+ ("i2", "t4", "19", "123567", "6", "543432"),
+ ("i3", "t3,t4", "19", "123678", "7", "543654"))
+
+ val test2Users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
+
+ val test2ItemRecScoresAll = List(
+ ("u0", "i3,i2,i1,i0", "123678.0,123567.0,123456.0,123123.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u1", "i3,i2,i1,i0", "123678.0,123567.0,123456.0,123123.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u2", "i3,i2,i1,i0", "123678.0,123567.0,123456.0,123123.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u3", "i3,i2,i1,i0", "123678.0,123567.0,123456.0,123123.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemRecScoresEmpty = List()
+
+ val test2ItemRecScoresi0 = List(
+ ("u0", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u1", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u2", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u3", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemRecScoresi0i1 = List(
+ ("u0", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u1", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u2", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u3", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemRecScoresi2i3 = List(
+ ("u0", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("u1", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("u2", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("u3", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet))
+
+ "recommendationTime < all item starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2Users, test2ItemRecScoresEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tB, test2Items, test2Users, test2ItemRecScoresi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tC, test2Items, test2Users, test2ItemRecScoresi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tD, test2Items, test2Users, test2ItemRecScoresAll)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tE, test2Items, test2Users, test2ItemRecScoresi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2Users, test2ItemRecScoresEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2Users, test2ItemRecScoresEmpty)
+ }
+
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparator.scala b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparator.scala
index 80540f5..dc08307 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparator.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparator.scala
@@ -35,6 +35,7 @@
*
* --itypes: <string separated by white space>. eg "--itypes type1 type2". If no --itypes specified, then ALL itypes will be used.
* --evalid: <int>. Offline Evaluation if evalid is specified
+ * --recommendationTime: <long> (eg. 9876543210). generate extra file (recommendItems.csv) which includes items with starttime <= recommendationTime and endtime > recommendationTime
* --debug: <String>. "test" - for testing purpose
*
* Example:
@@ -60,7 +61,7 @@
val preItypesArg = args.list("itypes")
val itypesArg: Option[List[String]] = if (preItypesArg.mkString(",").length == 0) None else Option(preItypesArg)
- // determin how to map actions to rating values
+ // determine how to map actions to rating values
def getActionParam(name: String): Option[Int] = {
val actionParam: Option[Int] = args(name) match {
case "ignore" => None
@@ -88,6 +89,8 @@
val debugArg = args.list("debug")
val DEBUG_TEST = debugArg.contains("test") // test mode
+ val recommendationTimeArg = args.optional("recommendationTime").map(_.toLong)
+
// NOTE: if OFFLINE_EVAL, read from training set, and use evalid as appid when read Items and U2iActions
val trainingAppid = if (OFFLINE_EVAL) evalidArg.get else appidArg
@@ -100,7 +103,7 @@
*/
val items = Items(appId = trainingAppid, itypes = itypesArg,
- dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readData('iidx, 'itypes)
+ dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
val users = Users(appId = trainingAppid,
dbType = dbTypeArg, dbName = dbNameArg, dbHost = dbHostArg, dbPort = dbPortArg).readData('uid)
@@ -118,10 +121,12 @@
users.write(userIdSink)
- items.mapTo(('iidx, 'itypes) -> ('iidx, 'itypes)) { fields: (String, List[String]) =>
- val (iidx, itypes) = fields
+ items.mapTo(('iidx, 'itypes, 'starttime, 'endtime) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, List[String], Long, Option[Long]) =>
+ val (iidx, itypes, starttime, endtime) = fields
- (iidx, itypes.mkString(",")) // NOTE: convert List[String] into comma-separated String
+ // NOTE: convert List[String] into comma-separated String
+ // NOTE: endtime is optional
+ (iidx, itypes.mkString(","), starttime, endtime.map(_.toString).getOrElse("PIO_NONE"))
}.write(selectedItemSink)
}
@@ -147,21 +152,21 @@
// use byte offset as index for Mahout algo
val itemsIndex = TextLine(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "selectedItems.tsv")).read
- .mapTo(('offset, 'line) -> ('iindex, 'iidx, 'itypes)) { fields: (String, String) =>
+ .mapTo(('offset, 'line) -> ('iindex, 'iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, String) =>
val (offset, line) = fields
val lineArray = line.split("\t")
- val (iidx, itypes) = try {
- (lineArray(0), lineArray(1))
+ val (iidx, itypes, starttime, endtime) = try {
+ (lineArray(0), lineArray(1), lineArray(2), lineArray(3))
} catch {
case e: Exception => {
- assert(false, "Failed to extract iidx and itypes from the line: " + line + ". Exception: " + e)
- (0, "dummy")
+ assert(false, "Failed to extract iidx, itypes, starttime and endtime from the line: " + line + ". Exception: " + e)
+ (0, "dummy", "dummy", "dummy")
}
}
- (offset, iidx, itypes)
+ (offset, iidx, itypes, starttime, endtime)
}
val usersIndex = TextLine(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "userIds.tsv")).read
@@ -177,6 +182,9 @@
val ratingsSink = Csv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "ratings.csv"))
+ // only recommend these items
+ val recommendItemsSink = Csv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "recommendItems.csv"))
+
/**
* computation
*/
@@ -185,9 +193,45 @@
usersIndex.write(usersIndexSink)
+ // Note: for u2i, use all items of the specified itypes.
+ // but recommendItems only include items to be recommended:
+ // - with valid starttime and endtime
+ recommendationTimeArg.foreach { recTime =>
+ itemsIndex
+ .filter('starttime, 'endtime) { fields: (Long, String) =>
+ val (starttimeI, endtime) = fields
+
+ val endtimeI: Option[Long] = endtime match {
+ case "PIO_NONE" => None
+ case x: String => {
+ try {
+ Some(x.toLong)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert ${x} to Long. Exception: " + e)
+ Some(0)
+ }
+ }
+ }
+ }
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recTime >= start)
+ case (start, Some(end)) => ((recTime >= start) && (recTime < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
+ .project('iindex)
+ .write(recommendItemsSink)
+ }
+
// filter and pre-process actions
u2i.joinWithSmaller('iid -> 'iidx, itemsIndex) // only select actions of these items
- .filter('action, 'v) { fields: (String, String) =>
+ .filter('action, 'v) { fields: (String, Option[String]) =>
val (action, v) = fields
val keepThis: Boolean = action match {
@@ -203,12 +247,19 @@
}
keepThis
}
- .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, String, String) =>
+ .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, Option[String], String) =>
val (action, v, t) = fields
// convert actions into rating value based on "action" and "v" fields
val rating: Int = action match {
- case ACTION_RATE => v.toInt
+ case ACTION_RATE => try {
+ v.get.toInt
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to integer for ${action} action. Exception:" + e)
+ 1
+ }
+ }
case ACTION_LIKE => likeParamArg.getOrElse {
assert(false, "Action type " + action + " should have been filtered out!")
1
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructor.scala b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructor.scala
index 65cc6c4..15677b3 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructor.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructor.scala
@@ -26,6 +26,7 @@
*
* --unseenOnly: <boolean> (true/false). only recommend unseen items if this is true.
* --numRecommendations: <int>. number of recommendations to be generated
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Optionsl args:
* --dbHost: <string> (eg. "127.0.0.1")
@@ -34,6 +35,9 @@
* --evalid: <int>. Offline Evaluation if evalid is specified
* --debug: <String>. "test" - for testing purpose
*
+ * --booleanData: <boolean>. Mahout item rec algo flag for implicit action data
+ * --implicitFeedback: <boolean>. Mahout item rec algo flag for implicit action data
+ *
* Example:
*
*/
@@ -62,6 +66,12 @@
val unseenOnlyArg = args("unseenOnly").toBoolean
val numRecommendationsArg = args("numRecommendations").toInt
+ val recommendationTimeArg = args("recommendationTime").toLong
+
+ val booleanDataArg = args.optional("booleanData").map(x => x.toBoolean).getOrElse(false)
+ val implicitFeedbackArg = args.optional("implicitFeedback").map(x => x.toBoolean).getOrElse(false)
+ // implicit preference flag.
+ val IMPLICIT_PREFERENCE = booleanDataArg || implicitFeedbackArg
/**
* source
@@ -72,10 +82,24 @@
val ratingSource = Csv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "ratings.csv"), ",", ('uindexR, 'iindexR, 'ratingR))
val itemsIndex = Tsv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "itemsIndex.tsv")).read
- .mapTo((0, 1, 2) -> ('iindexI, 'iidI, 'itypesI)) { fields: (String, String, String) =>
- val (iindex, iid, itypes) = fields // itypes are comma-separated String
+ .mapTo((0, 1, 2, 3, 4) -> ('iindexI, 'iidI, 'itypesI, 'starttimeI, 'endtimeI)) { fields: (String, String, String, Long, String) =>
+ val (iindex, iid, itypes, starttime, endtime) = fields // itypes are comma-separated String
- (iindex, iid, itypes.split(",").toList)
+ val endtimeOpt: Option[Long] = endtime match {
+ case "PIO_NONE" => None
+ case x: String => {
+ try {
+ Some(x.toLong)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert ${x} to Long. Exception: " + e)
+ Some(0)
+ }
+ }
+ }
+ }
+
+ (iindex, iid, itypes.split(",").toList, starttime, endtimeOpt)
}
val usersIndex = Tsv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "usersIndex.tsv")).read
@@ -94,7 +118,9 @@
* computation
*/
- val seenRatings = ratingSource.read
+ val seenRatings = ratingSource.read.mapTo(('uindexR, 'iindexR, 'ratingR) -> ('uindexR, 'iindexR, 'ratingR)) {
+ fields: (String, String, Double) => fields // convert score from String to Double
+ }
// convert to (uindex, iindex, rating) format
// and filter seen items from predicted
@@ -103,7 +129,10 @@
.filter('ratingR) { r: Double => (r == 0) } // if ratingR == 0, means unseen rating
.project('uindex, 'iindex, 'rating)
- val combinedRating = if (unseenOnlyArg) predictedRating else {
+ // NOTE: only suppoort unseenOnly if IMPLICIT_PREFERENCE = true because
+ // can't simply merge the seen preference value with predicted preference value due to different meaning in value
+ // (depending on which distance function is used).
+ val combinedRating = if (unseenOnlyArg || IMPLICIT_PREFERENCE) predictedRating else {
// rename for concatenation
val seenRatings2 = seenRatings.rename(('uindexR, 'iindexR, 'ratingR) -> ('uindex, 'iindex, 'rating))
@@ -112,8 +141,21 @@
}
combinedRating
- .groupBy('uindex) { _.sortBy('rating).reverse.take(numRecommendationsArg) }
.joinWithSmaller('iindex -> 'iindexI, itemsIndex)
+ .filter('starttimeI, 'endtimeI) { fields: (Long, Option[Long]) =>
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
+ .groupBy('uindex) { _.sortBy('rating).reverse.take(numRecommendationsArg) }
.joinWithSmaller('uindex -> 'uindexU, usersIndex)
.project('uidU, 'iidI, 'rating, 'itypesI)
.groupBy('uidU) { _.sortBy('rating).reverse.toList[(String, Double, List[String])](('iidI, 'rating, 'itypesI) -> 'iidsList) }
@@ -126,11 +168,21 @@
[0:2.0]
[16:3.0]
*/
- def parsePredictedData(data: String): List[(String, String)] = {
+ def parsePredictedData(data: String): List[(String, Double)] = {
val dataLen = data.length
data.take(dataLen - 1).tail.split(",").toList.map { ratingData =>
val ratingDataArray = ratingData.split(":")
- (ratingDataArray(0), ratingDataArray(1))
+ val item = ratingDataArray(0)
+ val rating: Double = try {
+ ratingDataArray(1).toDouble
+ } catch {
+ case e: Exception =>
+ {
+ assert(false, s"Cannot convert rating value of item ${item} to double: " + ratingDataArray + ". Exception: " + e)
+ }
+ 0.0
+ }
+ (item, rating)
}
}
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparatorTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparatorTest.scala
index 92bc572..bb7adec 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparatorTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/DataPreparatorTest.scala
@@ -16,106 +16,206 @@
//val ViewDetails = "viewDetails"
val Conversion = "conversion"
- def test(itypes: List[String], params: Map[String, String],
- items: List[(String, String)],
+ val appid = 2
+
+ def test(itypes: List[String], recommendationTime: Long, params: Map[String, String],
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
users: List[Tuple1[String]],
u2iActions: List[(String, String, String, String, String)],
ratings: List[(String, String, String)],
- selectedItems: List[(String, String)],
+ selectedItems: List[(String, String, String, String)], // id, itypes, starttime, endtime
itemsIndexer: Map[String, String],
- usersIndexer: Map[String, String]) = {
+ usersIndexer: Map[String, String],
+ recommendItems: Option[List[String]] = None) = {
val userIds = users map (x => x._1)
- val selectedItemsTextLine = selectedItems map { x => (itemsIndexer(x._1), x._1 + "\t" + x._2) }
+ val selectedItemsTextLine = selectedItems map { x => (itemsIndexer(x._1), x.productIterator.mkString("\t")) }
val usersTextLine = users map { x => (usersIndexer(x._1), x._1) }
- val itemsIndex = selectedItems map { x => (itemsIndexer(x._1), x._1, x._2) }
+ val itemsIndex = selectedItems map { x => (itemsIndexer(x._1), x._1, x._2, x._3, x._4) }
val usersIndex = users map { x => (usersIndexer(x._1), x._1) }
val ratingsIndexed = ratings map { x => (usersIndexer(x._1), itemsIndexer(x._2), x._3) }
+ val recommendItemsIndexed = recommendItems.map { x => x.map(y => itemsIndexer(y)) }.getOrElse(List())
+
val dbType = "file"
val dbName = "testpath/"
val dbHost = None
val dbPort = None
val hdfsRoot = "testroot/"
- val appid = 2
val engineid = 4
val algoid = 5
val evalid = None
- JobTest("io.prediction.algorithms.scalding.mahout.itemrec.DataCopy")
- .arg("dbType", dbType)
- .arg("dbName", dbName)
- .arg("hdfsRoot", hdfsRoot)
- .arg("appid", appid.toString)
- .arg("engineid", engineid.toString)
- .arg("algoid", algoid.toString)
- .arg("itypes", itypes)
- .arg("viewParam", params("viewParam"))
- .arg("likeParam", params("likeParam"))
- .arg("dislikeParam", params("dislikeParam"))
- .arg("conversionParam", params("conversionParam"))
- .arg("conflictParam", params("conflictParam"))
- .source(Items(appId = appid, itypes = Some(itypes), dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
- .source(Users(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, users)
- .sink[(String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "userIds.tsv"))) { outputBuffer =>
- "correctly write userIds.tsv" in {
- outputBuffer.toList must containTheSameElementsAs(userIds)
+ if (recommendItems == None) {
+ JobTest("io.prediction.algorithms.scalding.mahout.itemrec.DataCopy")
+ .arg("dbType", dbType)
+ .arg("dbName", dbName)
+ .arg("hdfsRoot", hdfsRoot)
+ .arg("appid", appid.toString)
+ .arg("engineid", engineid.toString)
+ .arg("algoid", algoid.toString)
+ .arg("itypes", itypes)
+ .arg("viewParam", params("viewParam"))
+ .arg("likeParam", params("likeParam"))
+ .arg("dislikeParam", params("dislikeParam"))
+ .arg("conversionParam", params("conversionParam"))
+ .arg("conflictParam", params("conflictParam"))
+ .source(Items(appId = appid, itypes = Some(itypes), dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
+ .source(Users(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, users)
+ .sink[(String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "userIds.tsv"))) { outputBuffer =>
+ "correctly write userIds.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(userIds)
+ }
}
- }
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv"))) { outputBuffer =>
- "correctly write selectedItems.tsv" in {
- outputBuffer.toList must containTheSameElementsAs(selectedItems)
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv"))) { outputBuffer =>
+ "correctly write selectedItems.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(selectedItems)
+ }
}
- }
- .run
- .finish
+ .run
+ .finish
+ } else {
+ JobTest("io.prediction.algorithms.scalding.mahout.itemrec.DataCopy")
+ .arg("dbType", dbType)
+ .arg("dbName", dbName)
+ .arg("hdfsRoot", hdfsRoot)
+ .arg("appid", appid.toString)
+ .arg("engineid", engineid.toString)
+ .arg("algoid", algoid.toString)
+ .arg("itypes", itypes)
+ .arg("viewParam", params("viewParam"))
+ .arg("likeParam", params("likeParam"))
+ .arg("dislikeParam", params("dislikeParam"))
+ .arg("conversionParam", params("conversionParam"))
+ .arg("conflictParam", params("conflictParam"))
+ .arg("recommendationTime", recommendationTime.toString)
+ .source(Items(appId = appid, itypes = Some(itypes), dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, items)
+ .source(Users(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, users)
+ .sink[(String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "userIds.tsv"))) { outputBuffer =>
+ "correctly write userIds.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(userIds)
+ }
+ }
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv"))) { outputBuffer =>
+ "correctly write selectedItems.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(selectedItems)
+ }
+ }
+ .run
+ .finish
+ }
- JobTest("io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator")
- .arg("dbType", dbType)
- .arg("dbName", dbName)
- .arg("hdfsRoot", hdfsRoot)
- .arg("appid", appid.toString)
- .arg("engineid", engineid.toString)
- .arg("algoid", algoid.toString)
- .arg("itypes", itypes)
- .arg("viewParam", params("viewParam"))
- .arg("likeParam", params("likeParam"))
- .arg("dislikeParam", params("dislikeParam"))
- .arg("conversionParam", params("conversionParam"))
- .arg("conflictParam", params("conflictParam"))
- .source(U2iActions(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
- .source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv")), selectedItemsTextLine)
- .source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "userIds.tsv")), usersTextLine)
- .sink[(String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv"))) { outputBuffer =>
- // index, iid, itypes
- "correctly write itemsIndex.tsv" in {
- outputBuffer.toList must containTheSameElementsAs(itemsIndex)
+ if (recommendItems == None) {
+ JobTest("io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator")
+ .arg("dbType", dbType)
+ .arg("dbName", dbName)
+ .arg("hdfsRoot", hdfsRoot)
+ .arg("appid", appid.toString)
+ .arg("engineid", engineid.toString)
+ .arg("algoid", algoid.toString)
+ .arg("itypes", itypes)
+ .arg("viewParam", params("viewParam"))
+ .arg("likeParam", params("likeParam"))
+ .arg("dislikeParam", params("dislikeParam"))
+ .arg("conversionParam", params("conversionParam"))
+ .arg("conflictParam", params("conflictParam"))
+ .source(U2iActions(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
+ .source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv")), selectedItemsTextLine)
+ .source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "userIds.tsv")), usersTextLine)
+ .sink[(String, String, String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv"))) { outputBuffer =>
+ // index, iid, itypes
+ "correctly write itemsIndex.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(itemsIndex)
+ }
}
- }
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "usersIndex.tsv"))) { outputBuffer =>
- // index, uid
- "correctly write usersIndex.tsv" in {
- outputBuffer.toList must containTheSameElementsAs(usersIndex)
+ .sink[(String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "usersIndex.tsv"))) { outputBuffer =>
+ // index, uid
+ "correctly write usersIndex.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(usersIndex)
+ }
}
- }
- .sink[(String, String, String)](Csv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "ratings.csv"))) { outputBuffer =>
- "correctly process and write data to ratings.csv" in {
- outputBuffer.toList must containTheSameElementsAs(ratingsIndexed)
+ .sink[(String, String, String)](Csv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "ratings.csv"))) { outputBuffer =>
+ "correctly process and write data to ratings.csv" in {
+ outputBuffer.toList must containTheSameElementsAs(ratingsIndexed)
+ }
}
- }
- .run
- .finish
-
+ .run
+ .finish
+ } else {
+ JobTest("io.prediction.algorithms.scalding.mahout.itemrec.DataPreparator")
+ .arg("dbType", dbType)
+ .arg("dbName", dbName)
+ .arg("hdfsRoot", hdfsRoot)
+ .arg("appid", appid.toString)
+ .arg("engineid", engineid.toString)
+ .arg("algoid", algoid.toString)
+ .arg("itypes", itypes)
+ .arg("viewParam", params("viewParam"))
+ .arg("likeParam", params("likeParam"))
+ .arg("dislikeParam", params("dislikeParam"))
+ .arg("conversionParam", params("conversionParam"))
+ .arg("conflictParam", params("conflictParam"))
+ .arg("recommendationTime", recommendationTime.toString)
+ .source(U2iActions(appId = appid, dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort).getSource, u2iActions)
+ .source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv")), selectedItemsTextLine)
+ .source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "userIds.tsv")), usersTextLine)
+ .sink[(String, String, String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv"))) { outputBuffer =>
+ // index, iid, itypes
+ "correctly write itemsIndex.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(itemsIndex)
+ }
+ }
+ .sink[(String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "usersIndex.tsv"))) { outputBuffer =>
+ // index, uid
+ "correctly write usersIndex.tsv" in {
+ outputBuffer.toList must containTheSameElementsAs(usersIndex)
+ }
+ }
+ .sink[(String, String, String)](Csv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "ratings.csv"))) { outputBuffer =>
+ "correctly process and write data to ratings.csv" in {
+ outputBuffer.toList must containTheSameElementsAs(ratingsIndexed)
+ }
+ }
+ .sink[(String)](Csv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "recommendItems.csv"))) { outputBuffer =>
+ "correctly process and write data to recomomendItems.csv" in {
+ outputBuffer.toList must containTheSameElementsAs(recommendItemsIndexed)
+ }
+ }
+ .run
+ .finish
+ }
}
+ val noEndtime = "PIO_NONE"
/**
* Test 1. basic. Rate actions only without conflicts
*/
val test1AllItypes = List("t1", "t2", "t3", "t4")
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test1ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test1Items = List(
+ test1ItemsMap("i0"),
+ test1ItemsMap("i1"),
+ test1ItemsMap("i2"),
+ test1ItemsMap("i3"))
+
+ val test1RecommendItems = Some(List("i0", "i1", "i2", "i3"))
+
+ def genSelectedItems(items: List[(String, String, String, String, String, String)]) = {
+ items map { x =>
+ val (id, itypes, appid, starttime, ct, endtime) = x
+ (id, itypes, starttime, endtime)
+ }
+ }
+
val test1ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
val test1Users = List(Tuple1("u0"), Tuple1("u1"), Tuple1("u2"), Tuple1("u3"))
@@ -141,18 +241,36 @@
"conflictParam" -> "latest")
"DataPreparator with only rate actions, all itypes, no conflict" should {
- test(test1AllItypes, test1Params, test1Items, test1Users, test1U2i, test1Ratings, test1Items, test1ItemsIndexer, test1UsersIndexer)
+ test(test1AllItypes, 20000, test1Params, test1Items, test1Users, test1U2i, test1Ratings, genSelectedItems(test1Items), test1ItemsIndexer, test1UsersIndexer, test1RecommendItems)
}
"DataPreparator with only rate actions, no itypes specified, no conflict" should {
- test(List(), test1Params, test1Items, test1Users, test1U2i, test1Ratings, test1Items, test1ItemsIndexer, test1UsersIndexer)
+ test(List(), 20000, test1Params, test1Items, test1Users, test1U2i, test1Ratings, genSelectedItems(test1Items), test1ItemsIndexer, test1UsersIndexer, test1RecommendItems)
+ }
+
+ "DataPreparator with only rate actions, no itypes specified, no conflict, without recommendItems arg" should {
+ test(List(), 20000, test1Params, test1Items, test1Users, test1U2i, test1Ratings, genSelectedItems(test1Items), test1ItemsIndexer, test1UsersIndexer, None)
}
/**
* Test 2. rate actions only with conflicts
*/
val test2AllItypes = List("t1", "t2", "t3", "t4")
- val test2Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test2Items = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i1"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
+
+ val test2RecommendItems = Some(List("i0", "i1", "i2", "i3"))
+
val test2ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
val test2Users = List(Tuple1("u0"), Tuple1("u1"), Tuple1("u2"), Tuple1("u3"))
@@ -204,7 +322,13 @@
("u1", "i1", "3"))
val test2Itypes_t1t4 = List("t1", "t4")
- val test2Items_t1t4 = List(("i0", "t1,t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2Items_t1t4 = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
+
+ val test2RecommendItems_t1t4 = Some(List("i0", "i2", "i3"))
+
val test2RatingsHighest_t1t4 = List(
("u0", "i0", "4"),
("u0", "i2", "5"),
@@ -217,26 +341,39 @@
val test2ParamsLowest = test2Params + ("conflictParam" -> "lowest")
"DataPreparator with only rate actions, all itypes, conflict=latest" should {
- test(test2AllItypes, test2Params, test2Items, test2Users, test2U2i, test2RatingsLatest, test2Items, test2ItemsIndexer, test2UsersIndexer)
+ test(test2AllItypes, 20000, test2Params, test2Items, test2Users, test2U2i, test2RatingsLatest, genSelectedItems(test2Items), test2ItemsIndexer, test2UsersIndexer, test2RecommendItems)
}
"DataPreparator with only rate actions, all itypes, conflict=highest" should {
- test(test2AllItypes, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest, test2Items, test2ItemsIndexer, test2UsersIndexer)
+ test(test2AllItypes, 20000, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest, genSelectedItems(test2Items), test2ItemsIndexer, test2UsersIndexer, test2RecommendItems)
}
"DataPreparator with only rate actions, all itypes, conflict=lowest" should {
- test(test2AllItypes, test2ParamsLowest, test2Items, test2Users, test2U2i, test2RatingsLowest, test2Items, test2ItemsIndexer, test2UsersIndexer)
+ test(test2AllItypes, 20000, test2ParamsLowest, test2Items, test2Users, test2U2i, test2RatingsLowest, genSelectedItems(test2Items), test2ItemsIndexer, test2UsersIndexer, test2RecommendItems)
}
"DataPreparator with only rate actions, some itypes, conflict=highest" should {
- test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest_t1t4, test2Items_t1t4, test2ItemsIndexer, test2UsersIndexer)
+ test(test2Itypes_t1t4, 20000, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest_t1t4, genSelectedItems(test2Items_t1t4), test2ItemsIndexer, test2UsersIndexer, test2RecommendItems_t1t4)
}
/**
- * Test 3. Different Actions without conflicts
+ * Test 3. Different Actions without conflicts and endtime
*/
val test3AllItypes = List("t1", "t2", "t3", "t4")
- val test3Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test3ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test3Items = List(
+ test3ItemsMap("i0"),
+ test3ItemsMap("i1"),
+ test3ItemsMap("i2"),
+ test3ItemsMap("i3"))
+
+ val test3RecommendItems = Some(List("i0", "i1", "i2", "i3"))
val test3ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
@@ -245,11 +382,11 @@
val test3U2i = List(
(Rate, "u0", "i0", "123450", "4"),
- (Like, "u0", "i1", "123457", "3"),
- (Dislike, "u0", "i2", "123458", "3"),
- (View, "u0", "i3", "123459", "0"), // NOTE: assume v field won't be missing
+ (Like, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"), // NOTE: assume v field won't be missing
(Rate, "u1", "i0", "123457", "2"),
- (Conversion, "u1", "i1", "123458", "0"))
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"))
val test3Ratings = List(
("u0", "i0", "4"),
@@ -263,17 +400,30 @@
"conflictParam" -> "latest")
"DataPreparator with only all actions, all itypes, no conflict" should {
- test(test3AllItypes, test3Params, test3Items, test3Users, test3U2i, test3Ratings, test3Items, test3ItemsIndexer, test3UsersIndexer)
+ test(test3AllItypes, 20000, test3Params, test3Items, test3Users, test3U2i, test3Ratings, genSelectedItems(test3Items), test3ItemsIndexer, test3UsersIndexer, test3RecommendItems)
}
/**
- * test 4. Different Actions with conflicts
+ * test 4. Different Actions with conflicts and endtime
*/
val test4Params: Map[String, String] = Map("viewParam" -> "2", "likeParam" -> "5", "dislikeParam" -> "1", "conversionParam" -> "4",
"conflictParam" -> "latest")
val test4AllItypes = List("t1", "t2", "t3", "t4")
- val test4Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test4ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test4Items = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i2"),
+ test4ItemsMap("i3"))
+
+ val test4RecommendItems = Some(List("i0", "i1", "i2", "i3"))
val test4ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
@@ -282,24 +432,24 @@
val test4U2i = List(
(Rate, "u0", "i0", "123448", "3"),
- (View, "u0", "i0", "123449", "4"), // lowest (2)
- (Like, "u0", "i0", "123451", "0"), // latest, highest (5)
- (Conversion, "u0", "i0", "123450", "1"),
+ (View, "u0", "i0", "123449", "PIO_NONE"), // lowest (2)
+ (Like, "u0", "i0", "123451", "PIO_NONE"), // latest, highest (5)
+ (Conversion, "u0", "i0", "123450", "PIO_NONE"),
(Rate, "u0", "i1", "123456", "1"), // lowest
(Rate, "u0", "i1", "123457", "4"), // highest
- (View, "u0", "i1", "123458", "3"), // latest (2)
+ (View, "u0", "i1", "123458", "PIO_NONE"), // latest (2)
- (Conversion, "u0", "i2", "123461", "2"), // latest, highest (4)
+ (Conversion, "u0", "i2", "123461", "PIO_NONE"), // latest, highest (4)
(Rate, "u0", "i2", "123459", "3"),
- (View, "u0", "i2", "123460", "5"), // lowest
+ (View, "u0", "i2", "123460", "PIO_NONE"), // lowest
(Rate, "u0", "i3", "123459", "2"),
- (View, "u1", "i0", "123457", "5"), // (2)
+ (View, "u1", "i0", "123457", "PIO_NONE"), // (2)
(Rate, "u1", "i1", "123458", "5"), // highest
- (Conversion, "u1", "i1", "123459", "4"), // (4)
- (Dislike, "u1", "i1", "123460", "1")) // latest, lowest (1)
+ (Conversion, "u1", "i1", "123459", "PIO_NONE"), // (4)
+ (Dislike, "u1", "i1", "123460", "PIO_NONE")) // latest, lowest (1)
val test4RatingsLatest = List(
("u0", "i0", "5"),
@@ -310,7 +460,7 @@
("u1", "i1", "1"))
"DataPreparator with all actions, all itypes, and conflicts=latest" should {
- test(test4AllItypes, test4Params, test4Items, test4Users, test4U2i, test4RatingsLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, 20000, test4Params, test4Items, test4Users, test4U2i, test4RatingsLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer, test4RecommendItems)
}
val test4ParamsIgnoreView = test4Params + ("viewParam" -> "ignore")
@@ -323,7 +473,7 @@
("u1", "i1", "1"))
"DataPreparator with all actions, all itypes, ignore View actions and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4Users, test4U2i, test4RatingsIgnoreViewLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, 20000, test4ParamsIgnoreView, test4Items, test4Users, test4U2i, test4RatingsIgnoreViewLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer, test4RecommendItems)
}
// note: currently rate action can't be ignored
@@ -338,7 +488,7 @@
("u1", "i1", "5"))
"DataPreparator with all actions, all itypes, ignore all actions except View (and Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllExceptViewLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, 20000, test4ParamsIgnoreAllExceptView, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllExceptViewLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer, test4RecommendItems)
}
// note: meaning rate action only
@@ -352,13 +502,19 @@
("u1", "i1", "5"))
"DataPreparator with all actions, all itypes, ignore all actions (except Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, 20000, test4ParamsIgnoreAll, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer, test4RecommendItems)
}
val test4ParamsLowest: Map[String, String] = test4Params + ("conflictParam" -> "lowest")
val test4Itypes_t3 = List("t3")
- val test4Items_t3 = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i3", "t3,t4"))
+ val test4Items_t3 = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i3"))
+
+ val test4RecommendItems_t3 = Some(List("i0", "i1", "i3"))
+
val test4RatingsLowest_t3 = List(
("u0", "i0", "2"),
("u0", "i1", "1"),
@@ -367,7 +523,97 @@
("u1", "i1", "1"))
"DataPreparator with only all actions, some itypes, and conflicts=lowest" should {
- test(test4Itypes_t3, test4ParamsLowest, test4Items, test4Users, test4U2i, test4RatingsLowest_t3, test4Items_t3, test4ItemsIndexer, test4UsersIndexer)
+ test(test4Itypes_t3, 20000, test4ParamsLowest, test4Items, test4Users, test4U2i, test4RatingsLowest_t3, genSelectedItems(test4Items_t3), test4ItemsIndexer, test4UsersIndexer, test4RecommendItems_t3)
+ }
+
+ /* test5: test starttime and endtime */
+
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test5AllItypes = List("t1", "t2", "t3", "t4")
+ val test5ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "123123", "12345", "543210"),
+ "i1" -> ("i1", "t1,t2", appid.toString, "123456", "12345", "543321"),
+ "i2" -> ("i2", "t2,t3", appid.toString, "123567", "12345", "543432"),
+ "i3" -> ("i3", "t2", appid.toString, "123678", "12345", "543654")
+ )
+
+ val test5Items = List(
+ test5ItemsMap("i0"),
+ test5ItemsMap("i1"),
+ test5ItemsMap("i2"),
+ test5ItemsMap("i3"))
+
+ val test5ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
+
+ val test5Users = List(Tuple1("u0"), Tuple1("u1"), Tuple1("u2"), Tuple1("u3"))
+ val test5UsersIndexer = Map("u0" -> "0", "u1" -> "1", "u2" -> "2", "u3" -> "3") // map uid to index
+
+ val test5U2i = List(
+ (Rate, "u0", "i0", "123450", "4"),
+ (Like, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"), // NOTE: assume v field won't be missing
+ (Rate, "u1", "i0", "123457", "2"),
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"))
+
+ val test5Ratings = List(
+ ("u0", "i0", "4"),
+ ("u0", "i1", "4"),
+ ("u0", "i2", "2"),
+ ("u0", "i3", "1"),
+ ("u1", "i0", "2"),
+ ("u1", "i1", "5"))
+
+ val test5RecommendItems = Some(List("i0", "i1", "i2", "i3"))
+ val test5RecommendItemsEmpty = Some(List())
+ val test5RecommendItemsi0 = Some(List("i0"))
+ val test5RecommendItemsi0i1 = Some(List("i0", "i1"))
+ val test5RecommendItemsi2i3 = Some(List("i2", "i3"))
+
+ val test5Params: Map[String, String] = Map("viewParam" -> "1", "likeParam" -> "4", "dislikeParam" -> "2", "conversionParam" -> "5",
+ "conflictParam" -> "latest")
+
+ "recommendationTime < all item starttime" should {
+ test(test5AllItypes, tA, test5Params, test5Items, test5Users, test5U2i, test5Ratings, genSelectedItems(test5Items), test5ItemsIndexer, test5UsersIndexer, test5RecommendItemsEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(test5AllItypes, tB, test5Params, test5Items, test5Users, test5U2i, test5Ratings, genSelectedItems(test5Items), test5ItemsIndexer, test5UsersIndexer, test5RecommendItemsi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(test5AllItypes, tC, test5Params, test5Items, test5Users, test5U2i, test5Ratings, genSelectedItems(test5Items), test5ItemsIndexer, test5UsersIndexer, test5RecommendItemsi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(test5AllItypes, tD, test5Params, test5Items, test5Users, test5U2i, test5Ratings, genSelectedItems(test5Items), test5ItemsIndexer, test5UsersIndexer, test5RecommendItems)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(test5AllItypes, tE, test5Params, test5Items, test5Users, test5U2i, test5Ratings, genSelectedItems(test5Items), test5ItemsIndexer, test5UsersIndexer, test5RecommendItemsi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(test5AllItypes, tF, test5Params, test5Items, test5Users, test5U2i, test5Ratings, genSelectedItems(test5Items), test5ItemsIndexer, test5UsersIndexer, test5RecommendItemsEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(test5AllItypes, tG, test5Params, test5Items, test5Users, test5U2i, test5Ratings, genSelectedItems(test5Items), test5ItemsIndexer, test5UsersIndexer, test5RecommendItemsEmpty)
}
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructorTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructorTest.scala
index f0a2d75..6c03f43 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructorTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemrec/ModelConstructorTest.scala
@@ -10,14 +10,15 @@
class ModelConstructorTest extends Specification with TupleConversions {
- def test(unseenOnly: Boolean, numRecommendations: Int,
- items: List[(String, String, String)],
+ val appid = 3
+
+ def test(unseenOnly: Boolean, numRecommendations: Int, recommendationTime: Long,
+ items: List[(String, String, String, String, String)], //(iindex, iid, itypes, starttime, endtime)
users: List[(String, String)],
predicted: List[(String, String)],
ratings: List[(String, String, String)],
output: List[(String, String, String, String)]) = {
- val appid = 3
val engineid = 4
val algoid = 7
val evalid = None
@@ -41,6 +42,7 @@
.arg("modelSet", modelSet.toString)
.arg("unseenOnly", unseenOnly.toString)
.arg("numRecommendations", numRecommendations.toString)
+ .arg("recommendationTime", recommendationTime.toString)
.source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, evalid, "predicted.tsv"), new Fields("uindex", "predicted")), predicted)
.source(Csv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "ratings.csv"), ",", new Fields("uindexR", "iindexR", "ratingR")), ratings)
.source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv")), items)
@@ -52,10 +54,108 @@
}
.run
.finish
-
}
- val test1Items = List(("0", "i0", "t1,t2,t3"), ("1", "i1", "t1,t2"), ("2", "i2", "t2,t3"), ("3", "i3", "t2"))
+ def testWithBooleanData(unseenOnly: Boolean, numRecommendations: Int, recommendationTime: Long,
+ items: List[(String, String, String, String, String)], //(iindex, iid, itypes, starttime, endtime)
+ users: List[(String, String)],
+ predicted: List[(String, String)],
+ ratings: List[(String, String, String)],
+ output: List[(String, String, String, String)],
+ booleanData: Boolean) = {
+
+ val engineid = 4
+ val algoid = 7
+ val evalid = None
+ val modelSet = true
+
+ val dbType = "file"
+ val dbName = "testpath/"
+ val dbHost = None
+ val dbPort = None
+ val hdfsRoot = "testroot/"
+
+ val itemRecScores = output map { case (uid, iid, score, itypes) => (uid, iid, score, itypes, algoid, modelSet) }
+
+ JobTest("io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor")
+ .arg("dbType", dbType)
+ .arg("dbName", dbName)
+ .arg("hdfsRoot", hdfsRoot)
+ .arg("appid", appid.toString)
+ .arg("engineid", engineid.toString)
+ .arg("algoid", algoid.toString)
+ .arg("modelSet", modelSet.toString)
+ .arg("unseenOnly", unseenOnly.toString)
+ .arg("numRecommendations", numRecommendations.toString)
+ .arg("recommendationTime", recommendationTime.toString)
+ .arg("booleanData", booleanData.toString)
+ .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, evalid, "predicted.tsv"), new Fields("uindex", "predicted")), predicted)
+ .source(Csv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "ratings.csv"), ",", new Fields("uindexR", "iindexR", "ratingR")), ratings)
+ .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv")), items)
+ .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "usersIndex.tsv")), users)
+ .sink[(String, String, String, String, Int, Boolean)](ItemRecScores(dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort, algoid = algoid, modelset = modelSet).getSource) { outputBuffer =>
+ "correctly write model data to a file" in {
+ outputBuffer.toList must containTheSameElementsAs(itemRecScores)
+ }
+ }
+ .run
+ .finish
+ }
+
+ def testWithImplicitFeedback(unseenOnly: Boolean, numRecommendations: Int, recommendationTime: Long,
+ items: List[(String, String, String, String, String)], //(iindex, iid, itypes, starttime, endtime)
+ users: List[(String, String)],
+ predicted: List[(String, String)],
+ ratings: List[(String, String, String)],
+ output: List[(String, String, String, String)],
+ implicitFeedback: Boolean) = {
+
+ val engineid = 4
+ val algoid = 7
+ val evalid = None
+ val modelSet = true
+
+ val dbType = "file"
+ val dbName = "testpath/"
+ val dbHost = None
+ val dbPort = None
+ val hdfsRoot = "testroot/"
+
+ val itemRecScores = output map { case (uid, iid, score, itypes) => (uid, iid, score, itypes, algoid, modelSet) }
+
+ JobTest("io.prediction.algorithms.scalding.mahout.itemrec.ModelConstructor")
+ .arg("dbType", dbType)
+ .arg("dbName", dbName)
+ .arg("hdfsRoot", hdfsRoot)
+ .arg("appid", appid.toString)
+ .arg("engineid", engineid.toString)
+ .arg("algoid", algoid.toString)
+ .arg("modelSet", modelSet.toString)
+ .arg("unseenOnly", unseenOnly.toString)
+ .arg("numRecommendations", numRecommendations.toString)
+ .arg("recommendationTime", recommendationTime.toString)
+ .arg("implicitFeedback", implicitFeedback.toString)
+ .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, evalid, "predicted.tsv"), new Fields("uindex", "predicted")), predicted)
+ .source(Csv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "ratings.csv"), ",", new Fields("uindexR", "iindexR", "ratingR")), ratings)
+ .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv")), items)
+ .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "usersIndex.tsv")), users)
+ .sink[(String, String, String, String, Int, Boolean)](ItemRecScores(dbType = dbType, dbName = dbName, dbHost = dbHost, dbPort = dbPort, algoid = algoid, modelset = modelSet).getSource) { outputBuffer =>
+ "correctly write model data to a file" in {
+ outputBuffer.toList must containTheSameElementsAs(itemRecScores)
+ }
+ }
+ .run
+ .finish
+ }
+
+ val noEndtime = "PIO_NONE"
+
+ /* test 1*/
+ val test1Items = List(
+ ("0", "i0", "t1,t2,t3", "12346", noEndtime),
+ ("1", "i1", "t1,t2", "12347", noEndtime),
+ ("2", "i2", "t2,t3", "12348", noEndtime),
+ ("3", "i3", "t2", "12349", noEndtime))
val test1Users = List(("0", "u0"), ("1", "u1"), ("2", "u2"), ("3", "u3"))
@@ -83,39 +183,166 @@
("u1", "i0", "1.2", "[t1,t2,t3]"))
"mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=100" should {
-
- test(false, 100, test1Items, test1Users, test1Predicted, test1Ratings, test1Output)
-
+ test(false, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1Output)
}
"mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=2" should {
-
- test(false, 2, test1Items, test1Users, test1Predicted, test1Ratings, test1Output2)
-
+ test(false, 2, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1Output2)
}
"mahout.itemrec.itembased ModelConstructor with unseenOnly=true and numRecommendations=100" should {
-
- test(true, 100, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly)
-
+ test(true, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly)
}
"mahout.itemrec.itembased ModelConstructor with unseenOnly=true and numRecommendations=1" should {
-
- test(true, 1, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly1)
-
+ test(true, 1, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly1)
}
"mahout.itemrec.itembased ModelConstructor with unseenOnly=false, numRecommendations=100 and seen items in predicted results" should {
-
- test(false, 100, test1Items, test1Users, test1PredictedWithSeenItems, test1Ratings, test1Output)
-
+ test(false, 100, 1234567890, test1Items, test1Users, test1PredictedWithSeenItems, test1Ratings, test1Output)
}
"mahout.itemrec.itembased ModelConstructor with unseenOnly=true, numRecommendations=100 and seen items in predicted results" should {
+ test(true, 100, 1234567890, test1Items, test1Users, test1PredictedWithSeenItems, test1Ratings, test1OutputUnseenOnly)
+ }
- test(true, 100, test1Items, test1Users, test1PredictedWithSeenItems, test1Ratings, test1OutputUnseenOnly)
+ /* booleanData */
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=true and numRecommendations=100 and booleanData=true" should {
+ testWithBooleanData(true, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly, true)
+ }
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=true and numRecommendations=100 and booleanData=false" should {
+ testWithBooleanData(true, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly, false)
+ }
+
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=100 and booleanData=true" should {
+ // should only generate unseen data if booleanData=true although unseenOnly=false
+ testWithBooleanData(false, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly, true)
+ }
+
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=100 and booleanData=false" should {
+ testWithBooleanData(false, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1Output, false)
+ }
+
+ /* implicitFeedback */
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=true and numRecommendations=100 and implicitFeedback=true" should {
+ testWithImplicitFeedback(true, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly, true)
+ }
+
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=true and numRecommendations=100 and implicitFeedback=false" should {
+ testWithImplicitFeedback(true, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly, false)
+ }
+
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=100 and implicitFeedback=true" should {
+ // should only generate unseen data if testWithImplicitFeedback=true although unseenOnly=false
+ testWithImplicitFeedback(false, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1OutputUnseenOnly, true)
+ }
+
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=100 and implicitFeedback=false" should {
+ testWithImplicitFeedback(false, 100, 1234567890, test1Items, test1Users, test1Predicted, test1Ratings, test1Output, false)
+ }
+
+ /* test 2: test double comparision */
+ val test2Items = List(
+ ("0", "i0", "t1,t2,t3", "12346", noEndtime),
+ ("1", "i1", "t1,t2", "12347", noEndtime),
+ ("2", "i2", "t2,t3", "12348", noEndtime),
+ ("3", "i3", "t2", "12349", noEndtime))
+
+ val test2Users = List(("0", "u0"), ("1", "u1"), ("2", "u2"), ("3", "u3"))
+
+ val test2Predicted = List(("0", "[1:123,2:9]"), ("1", "[0:1]"))
+
+ val test2Ratings = List(("0", "0", "2"), ("0", "3", "88"))
+
+ val test2Output = List(
+ ("u0", "i1,i3,i2,i0", "123.0,88.0,9.0,2.0", "[t1,t2],[t2],[t2,t3],[t1,t2,t3]"),
+ ("u1", "i0", "1.0", "[t1,t2,t3]"))
+
+ "mahout.itemrec.itembased ModelConstructor with unseenOnly=false and numRecommendations=100 (score should not be compared as string)" should {
+
+ test(false, 100, 1234567890, test2Items, test2Users, test2Predicted, test2Ratings, test2Output)
+
+ }
+
+ /* test3: test starttime and endtime */
+
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test3Items = List(
+ ("0", "i0", "t1,t2,t3", "123123", "543210"),
+ ("1", "i1", "t1,t2", "123456", "543321"),
+ ("2", "i2", "t2,t3", "123567", "543432"),
+ ("3", "i3", "t2", "123678", "543654"))
+
+ val test3Users = List(("0", "u0"), ("1", "u1"), ("2", "u2"), ("3", "u3"))
+
+ val test3Predicted = List(("0", "[1:123,2:9]"), ("1", "[0:1]"))
+
+ val test3Ratings = List(
+ ("0", "0", "2"), ("0", "3", "88"),
+ ("1", "2", "3"),
+ ("2", "3", "4"))
+
+ val test3Output = List(
+ ("u0", "i1,i3,i2,i0", "123.0,88.0,9.0,2.0", "[t1,t2],[t2],[t2,t3],[t1,t2,t3]"),
+ ("u1", "i2,i0", "3.0,1.0", "[t2,t3],[t1,t2,t3]"),
+ ("u2", "i3", "4.0", "[t2]"))
+
+ val test3OutputEmpty = List()
+
+ val test3Outputi0 = List(
+ ("u0", "i0", "2.0", "[t1,t2,t3]"),
+ ("u1", "i0", "1.0", "[t1,t2,t3]"))
+
+ val test3Outputi0i1 = List(
+ ("u0", "i1,i0", "123.0,2.0", "[t1,t2],[t1,t2,t3]"),
+ ("u1", "i0", "1.0", "[t1,t2,t3]"))
+
+ val test3Outputi2i3 = List(
+ ("u0", "i3,i2", "88.0,9.0", "[t2],[t2,t3]"),
+ ("u1", "i2", "3.0", "[t2,t3]"),
+ ("u2", "i3", "4.0", "[t2]"))
+
+ "unseenOnly=false, numRecommendations=100 and recommendationTime < all item starttime" should {
+ test(false, 100, tA, test3Items, test3Users, test3Predicted, test3Ratings, test3OutputEmpty)
+ }
+
+ "unseenOnly=false, numRecommendations=100 and recommendationTime == earliest starttime" should {
+ test(false, 100, tB, test3Items, test3Users, test3Predicted, test3Ratings, test3Outputi0)
+ }
+
+ "unseenOnly=false, numRecommendations=100 and recommendationTime > some items starttime" should {
+ test(false, 100, tC, test3Items, test3Users, test3Predicted, test3Ratings, test3Outputi0i1)
+ }
+
+ "unseenOnly=false, numRecommendations=100 and recommendationTime > all item starttime and < all item endtime" should {
+ test(false, 100, tD, test3Items, test3Users, test3Predicted, test3Ratings, test3Output)
+ }
+
+ "unseenOnly=false, numRecommendations=100 and recommendationTime > some item endtime" should {
+ test(false, 100, tE, test3Items, test3Users, test3Predicted, test3Ratings, test3Outputi2i3)
+ }
+
+ "unseenOnly=false, numRecommendations=100 and recommendationTime == last item endtime" should {
+ test(false, 100, tF, test3Items, test3Users, test3Predicted, test3Ratings, test3OutputEmpty)
+ }
+
+ "unseenOnly=false, numRecommendations=100 and recommendationTime > last item endtime" should {
+ test(false, 100, tG, test3Items, test3Users, test3Predicted, test3Ratings, test3OutputEmpty)
}
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRank.scala b/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRank.scala
index 41810a9..3c60755 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRank.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRank.scala
@@ -33,8 +33,8 @@
*
* --itypes: <string separated by white space>. optional. eg "--itypes type1 type2". If no --itypes specified, then ALL itypes will be used.
* --numRecommendations: <int>. number of recommendations to be generated
- *
* --modelSet: <boolean> (true/false). flag to indicate which set
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Example:
* hadoop jar PredictionIO-Process-Hadoop-Scala-assembly-0.1.jar io.prediction.algorithms.scalding.itemrec.randomrank.RandomRank --hdfs --training_dbType mongodb --training_dbName predictionio_appdata --training_dbHost localhost --training_dbPort 27017 --modeldata_dbType mongodb --modeldata_dbName predictionio_modeldata --modeldata_dbHost localhost --modeldata_dbPort 27017 --hdfsRoot predictionio/ --appid 1 --engineid 1 --algoid 18 --modelSet true
@@ -68,6 +68,7 @@
val numRecommendationsArg = args("numRecommendations").toInt
val modelSetArg = args("modelSet").toBoolean
+ val recommendationTimeArg = args("recommendationTime").toLong
/**
* source
@@ -79,7 +80,7 @@
// get items data
val items = Items(appId = trainingAppid, itypes = itypesArg,
- dbType = training_dbTypeArg, dbName = training_dbNameArg, dbHost = training_dbHostArg, dbPort = training_dbPortArg).readData('iidx, 'itypes)
+ dbType = training_dbTypeArg, dbName = training_dbNameArg, dbHost = training_dbHostArg, dbPort = training_dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
val users = Users(appId = trainingAppid,
dbType = training_dbTypeArg, dbName = training_dbNameArg, dbHost = training_dbHostArg, dbPort = training_dbPortArg).readData('uid)
@@ -96,7 +97,23 @@
/**
* computation
*/
- val itemsWithKey = items.map(() -> 'itemKey) { u: Unit => 1 }
+ val itemsWithKey = items
+ .filter('starttime, 'endtime) { fields: (Long, Option[Long]) =>
+ // only keep items with valid starttime and endtime
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
+ .map(() -> 'itemKey) { u: Unit => 1 }
+
val usersWithKey = users.map(() -> 'userKey) { u: Unit => 1 }
val scores = usersWithKey.joinWithSmaller('userKey -> 'itemKey, itemsWithKey)
@@ -106,14 +123,6 @@
// another way to is to do toList then take top n from List. But then it would create an unncessary long List
// for each group first. not sure which way is better.
.groupBy('uid) { _.sortBy('score).reverse.toList[(String, Double, List[String])](('iidx, 'score, 'itypes) -> 'iidsList) }
-
- // this is solely for debug purpose
- /*
- scores.project('uid, 'iidx, 'score)
- .write(scoresFile)
- */
-
- // write modeldata
- scores.then(itemRecScores.writeData('uid, 'iidsList, algoidArg, modelSetArg) _)
+ .then(itemRecScores.writeData('uid, 'iidsList, algoidArg, modelSetArg) _)
}
diff --git a/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRankTest.scala b/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRankTest.scala
index e86c7e1..c6d6c06 100644
--- a/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRankTest.scala
+++ b/process/engines/itemrec/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemrec/randomrank/RandomRankTest.scala
@@ -13,7 +13,8 @@
def test(algoid: Int, modelSet: Boolean,
itypes: List[String],
numRecommendations: Int,
- items: List[(String, String)],
+ recommendationTime: Long,
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
users: List[(String, String)],
itemRecScores: List[(String, String, String, String, Int, Boolean)]) = {
@@ -41,6 +42,7 @@
.arg("itypes", itypes)
.arg("numRecommendations", numRecommendations.toString)
.arg("modelSet", modelSet.toString)
+ .arg("recommendationTime", recommendationTime.toString)
.source(Items(appId = appid, itypes = Some(itypes),
dbType = training_dbType, dbName = training_dbName, dbHost = None, dbPort = None).getSource, items)
.source(Users(appId = appid,
@@ -110,23 +112,30 @@
}
- "not generate same order of iid for all uid group" in {
+ if (getIids(itemRecScores).flatMap { x => x }.toSet.size > 1) {
+ // only check this if the iids in itemRecScores are more than 1
+ "not generate same order of iid for all uid group" in {
+ if (!(getIids(outputBuffer.toList).toSet.size > 1)) {
+ println(outputBuffer)
+ println(getIids(outputBuffer.toList).toSet)
+ }
+ getIids(outputBuffer.toList).toSet.size must be_>(1)
- getIids(outputBuffer.toList).toSet.size must be_>(1)
-
+ }
}
"itypes order match the iids order" in {
// extract (iid, itypes) from the output
val itypesList = getItypes(outputBuffer.toList)
- val itemsMap = items.toMap
+ val itemsMap = items.map(x =>
+ (x._1, x)).toMap
// use the iid only and contruct the (iid, itypes)
val expected = getIids(outputBuffer.toList).map(x =>
// x is List of iid
// create the List of item types using the iid
- x.map(x => (x, itemsMap(x).split(",").toList))
+ x.map(x => (x, itemsMap(x)._2.split(",").toList))
)
itypesList must be_==(expected)
@@ -138,12 +147,20 @@
.finish
}
+ val largeNumber: Long = scala.Long.MaxValue // larger than any item starttime
+ val noEndtime = "PIO_NONE"
+
"randomrank.RandomRank with selected itypes" should {
val algoid = 12
val modelSet = false
val itypes = List("t1", "t2")
- val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val items = List(
+ ("i0", "t1,t2,t3", "19", "123456", "345678", noEndtime),
+ ("i1", "t2,t3", "19", "123457", "567890", noEndtime),
+ ("i2", "t4", "19", "21", "88", noEndtime),
+ ("i3", "t3,t4", "19", "9876543210", "67890", noEndtime))
+
val users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
val itemRecScores = List(
("u0", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
@@ -151,7 +168,7 @@
("u2", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet),
("u3", "i0,i1", "0.0,0.0", "[t1,t2,t3],[t2,t3]", algoid, modelSet))
- test(algoid, modelSet, itypes, 500, items, users, itemRecScores)
+ test(algoid, modelSet, itypes, 500, largeNumber, items, users, itemRecScores)
}
@@ -160,7 +177,11 @@
val algoid = 12
val modelSet = false
val itypes = List("")
- val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val items = List(
+ ("i0", "t1,t2,t3", "19", "123456", "345678", noEndtime),
+ ("i1", "t2,t3", "19", "123457", "567890", noEndtime),
+ ("i2", "t4", "19", "21", "88", noEndtime),
+ ("i3", "t3,t4", "19", "9876543210", "67890", noEndtime))
val users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
val itemRecScores = List(
("u0", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
@@ -168,10 +189,92 @@
("u2", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet),
("u3", "i0,i1,i2,i3", "0.0,0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4],[t3,t4]", algoid, modelSet))
- test(algoid, modelSet, itypes, 500, items, users, itemRecScores)
+ test(algoid, modelSet, itypes, 500, largeNumber, items, users, itemRecScores)
}
// TODO: test with smaller number of numRecommendations (but can't know expected result beacause the score is random...)
+ /* test 2: test starttime and endtime */
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test2Algoid = 12
+ val test2ModelSet = false
+
+ val test2ItypesAll = List("t1", "t2", "t3", "t4")
+ val test2Items = List(
+ ("i0", "t1,t2,t3", "19", "123123", "4", "543210"),
+ ("i1", "t2,t3", "19", "123456", "5", "543321"),
+ ("i2", "t4", "19", "123567", "6", "543432"),
+ ("i3", "t3,t4", "19", "123678", "7", "543654"))
+
+ val test2Users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
+
+ val test2ItemRecScoresAll = List(
+ ("u0", "i3,i2,i1,i0", "0.0,0.0,0.0,0.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u1", "i3,i2,i1,i0", "0.0,0.0,0.0,0.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u2", "i3,i2,i1,i0", "0.0,0.0,0.0,0.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u3", "i3,i2,i1,i0", "0.0,0.0,0.0,0.0", "[t3,t4],[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemRecScoresEmpty = List()
+
+ val test2ItemRecScoresi0 = List(
+ ("u0", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u1", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u2", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u3", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemRecScoresi0i1 = List(
+ ("u0", "i1,i0", "0.0,0.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u1", "i1,i0", "0.0,0.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u2", "i1,i0", "0.0,0.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("u3", "i1,i0", "0.0,0.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemRecScoresi2i3 = List(
+ ("u0", "i3,i2", "0.0,0.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("u1", "i3,i2", "0.0,0.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("u2", "i3,i2", "0.0,0.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("u3", "i3,i2", "0.0,0.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet))
+
+ "recommendationTime < all item starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2Users, test2ItemRecScoresEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tB, test2Items, test2Users, test2ItemRecScoresi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tC, test2Items, test2Users, test2ItemRecScoresi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tD, test2Items, test2Users, test2ItemRecScoresAll)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tE, test2Items, test2Users, test2ItemRecScoresi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2Users, test2ItemRecScoresEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2Users, test2ItemRecScoresEmpty)
+ }
+
}
diff --git a/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/main/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparator.scala b/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/main/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparator.scala
index 0fe5f13..5bb1412 100644
--- a/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/main/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparator.scala
+++ b/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/main/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparator.scala
@@ -120,16 +120,37 @@
// for each user, get a list of items which match the goalParam
// TODO: filter out items appeared in trainingU2i?
val testSetRelevant = testU2i
- .filter('actionTest, 'vTest) { fields: (String, String) =>
+ .filter('actionTest, 'vTest) { fields: (String, Option[String]) =>
val (action, v) = fields
val cond: Boolean = goalParamArg match {
case GOAL_VIEW => (action == ACTION_VIEW)
case GOAL_CONVERSION => (action == ACTION_CONVERSION)
case GOAL_LIKE => (action == ACTION_LIKE)
- case GOAL_RATE3 => (action == ACTION_RATE) && (v.toInt >= 3)
- case GOAL_RATE4 => (action == ACTION_RATE) && (v.toInt >= 4)
- case GOAL_RATE5 => (action == ACTION_RATE) && (v.toInt >= 5)
+ case GOAL_RATE3 => try {
+ (action == ACTION_RATE) && (v.get.toInt >= 3)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to int. Exception:" + e)
+ false
+ }
+ }
+ case GOAL_RATE4 => try {
+ (action == ACTION_RATE) && (v.get.toInt >= 4)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to int. Exception:" + e)
+ false
+ }
+ }
+ case GOAL_RATE5 => try {
+ (action == ACTION_RATE) && (v.get.toInt >= 5)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to int. Exception:" + e)
+ false
+ }
+ }
case _ => {
assert(false, "Invalid goalParam " + goalParamArg + ".")
false
diff --git a/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/test/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparatorTest.scala b/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/test/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparatorTest.scala
index 706b6d2..d69da3c 100644
--- a/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/test/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparatorTest.scala
+++ b/process/engines/itemrec/evaluations/hadoop/scalding/metrics/map/src/test/scala/io/prediction/metrics/scalding/itemrec/map/MAPAtKDataPreparatorTest.scala
@@ -18,8 +18,8 @@
def test(params: Map[String, String],
testU2i: List[(String, String, String, String, String)],
- relevantItems: List[(String, String)], // List(("u0", "i0,i1,i2"), ("u1", "i0,i1,i2"))
- topKItems: List[(String, String)]) = {
+ relevantItems: List[(String, String)] // List(("u0", "i0,i1,i2"), ("u1", "i0,i1,i2"))
+ ) = {
val test_dbType = "file"
val test_dbName = "testsetpath/"
@@ -75,11 +75,6 @@
}
}
- /*.sink[(String, String)](Tsv(OfflineMetricFile(hdfsRoot, 2, 4, 5, 6, 8, "topKItems.tsv"))) { outputBuffer =>
- "correctly generates topKItems for each user" in {
- outputBuffer.toList must containTheSameElementsAs(topKItems)
- }
- }*/
.run
.finish
}
@@ -88,25 +83,27 @@
val testU2i = List(
// u0
- (Rate, "u0", "i0", "123450", "4"),
- (View, "u0", "i1", "123457", "1"),
- (Dislike, "u0", "i2", "123458", "0"),
- (View, "u0", "i3", "123459", "0"),
- (View, "u0", "i7", "123460", "0"),
+ (Rate, "u0", "i0", "123450", "4"),
+ (View, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"),
+ (View, "u0", "i7", "123460", "PIO_NONE"),
+ (Rate, "u0", "i8", "123450", "5"),
// u1
- (View, "u1", "i0", "123457", "2"),
- (Conversion, "u1", "i1", "123458", "0"),
- (Conversion, "u1", "i4", "123457", "0"),
- (Conversion, "u1", "i5", "123456", "0"),
+ (View, "u1", "i0", "123457", "PIO_NONE"),
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"),
+ (Conversion, "u1", "i4", "123457", "PIO_NONE"),
+ (Conversion, "u1", "i5", "123456", "PIO_NONE"),
(Rate, "u1", "i7", "123456", "3"),
(Rate, "u1", "i8", "123454", "3"),
(Rate, "u1", "i9", "123453", "4"),
// u2
- (View, "u2", "i3", "123458", "0"),
- (Conversion, "u2", "i4", "123451", "0"),
- (Conversion, "u2", "i5", "123452", "0"))
+ (View, "u2", "i3", "123458", "PIO_NONE"),
+ (Conversion, "u2", "i4", "123451", "PIO_NONE"),
+ (Conversion, "u2", "i5", "123452", "PIO_NONE"),
+ (Rate, "u2", "i6", "123452", "5"))
"itemrec.map MAPAtKDataPreparator with goal = view" should {
val params = Map("goalParam" -> "view", "kParam" -> "4")
@@ -114,12 +111,8 @@
("u0", "i1,i3,i7"),
("u1", "i0"),
("u2", "i3"))
-
- val topKItems = List(
- ("u0", "i9,i8,i7,i6"),
- ("u1", "i0,i1,i2,i3"))
-
- test(params, testU2i, relevantItems, topKItems)
+
+ test(params, testU2i, relevantItems)
}
"itemrec.map MAPAtKDataPreparator with goal = conversion" should {
@@ -127,12 +120,37 @@
val relevantItems = List(
("u1", "i1,i4,i5"),
("u2", "i4,i5"))
-
- val topKItems = List(
- ("u0", "i9,i8,i7,i6,i5,i4,i3,i2"),
- ("u1", "i0,i1,i2,i3,i4,i5"))
+
+ test(params, testU2i, relevantItems)
+ }
+
+ "itemrec.map MAPAtKDataPreparator with goal = rate >= 3" should {
+ val params = Map("goalParam" -> "rate3", "kParam" -> "8")
+ val relevantItems = List(
+ ("u0", "i0,i8"),
+ ("u1", "i7,i8,i9"),
+ ("u2", "i6"))
- test(params, testU2i, relevantItems, topKItems)
+ test(params, testU2i, relevantItems)
+ }
+
+ "itemrec.map MAPAtKDataPreparator with goal = rate >= 4" should {
+ val params = Map("goalParam" -> "rate4", "kParam" -> "8")
+ val relevantItems = List(
+ ("u0", "i0,i8"),
+ ("u1", "i9"),
+ ("u2", "i6"))
+
+ test(params, testU2i, relevantItems)
+ }
+
+ "itemrec.map MAPAtKDataPreparator with goal = rate >= 5" should {
+ val params = Map("goalParam" -> "rate5", "kParam" -> "8")
+ val relevantItems = List(
+ ("u0", "i8"),
+ ("u2", "i6"))
+
+ test(params, testU2i, relevantItems)
}
}
\ No newline at end of file
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparator.scala b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparator.scala
index 08a2f2d..417370b 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparator.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparator.scala
@@ -114,7 +114,7 @@
// get items data
val items = Items(appId=trainingAppid, itypes=itypesArg,
- dbType=dbTypeArg, dbName=dbNameArg, dbHost=dbHostArg, dbPort=dbPortArg).readData('iidx, 'itypes)
+ dbType=dbTypeArg, dbName=dbNameArg, dbHost=dbHostArg, dbPort=dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
val u2i = U2iActions(appId=trainingAppid,
dbType=dbTypeArg, dbName=dbNameArg, dbHost=dbHostArg, dbPort=dbPortArg).readData('action, 'uid, 'iid, 't, 'v)
@@ -132,7 +132,7 @@
* computation
*/
u2i.joinWithSmaller('iid -> 'iidx, items) // only select actions of these items
- .filter('action, 'v) { fields: (String, String) =>
+ .filter('action, 'v) { fields: (String, Option[String]) =>
val (action, v) = fields
val keepThis: Boolean = action match {
@@ -148,12 +148,19 @@
}
keepThis
}
- .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, String, String) =>
+ .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, Option[String], String) =>
val (action, v, t) = fields
// convert actions into rating value based on "action" and "v" fields
val rating: Int = action match {
- case ACTION_RATE => v.toInt
+ case ACTION_RATE => try {
+ v.get.toInt
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to integer for ${action} action. Exception:" + e)
+ 1
+ }
+ }
case ACTION_LIKE => likeParamArg.getOrElse{
assert(false, "Action type " + action + " should have been filtered out!")
1
@@ -183,10 +190,12 @@
.write(ratingsSink)
// Also store the selected items into DataFile for later model construction usage.
- items.mapTo(('iidx, 'itypes) -> ('iidx, 'itypes)) { fields: (String, List[String]) =>
- val (iidx, itypes) = fields
+ items.mapTo(('iidx, 'itypes, 'starttime, 'endtime) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, List[String], Long, Option[Long]) =>
+ val (iidx, itypes, starttime, endtime) = fields
- (iidx, itypes.mkString(",")) // NOTE: convert List[String] into comma-separated String
+ // NOTE: convert List[String] into comma-separated String
+ // NOTE: endtime is optional
+ (iidx, itypes.mkString(","), starttime, endtime.map(_.toString).getOrElse("PIO_NONE"))
}.write(selectedItemsSink)
/**
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructor.scala b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructor.scala
index 263b461..f792d41 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructor.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/main/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructor.scala
@@ -25,7 +25,8 @@
* --engineid: <int>
* --algoid: <int>
* --modelSet: <boolean> (true/false). flag to indicate which set
- *
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
+ *
* Optionsl args:
* --dbHost: <string> (eg. "127.0.0.1")
* --dbPort: <int> (eg. 27017)
@@ -58,7 +59,8 @@
val DEBUG_TEST = debugArg.contains("test") // test mode
val modelSetArg = args("modelSet").toBoolean
-
+ val recommendationTimeArg = args("recommendationTime").toLong
+
/**
* input
*/
@@ -66,16 +68,43 @@
.mapTo((0, 1, 2) -> ('iid, 'simiid, 'score)) { fields: (String, String, Double) => fields }
val items = Tsv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "selectedItems.tsv")).read
- .mapTo((0, 1) -> ('iidx, 'itypes)) { fields: (String, String) =>
- val (iidx, itypes) = fields // itypes are comma-separated String
-
- (iidx, itypes.split(",").toList)
+ .mapTo((0, 1, 2, 3) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, String, Long, String) =>
+ val (iidx, itypes, starttime, endtime) = fields // itypes are comma-separated String
+
+ val endtimeOpt: Option[Long] = endtime match {
+ case "PIO_NONE" => None
+ case x: String => {
+ try {
+ Some(x.toLong)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert ${x} to Long. Exception: " + e)
+ Some(0)
+ }
+ }
+ }
+ }
+
+ (iidx, itypes.split(",").toList, starttime, endtimeOpt)
}
/**
* process & output
*/
val p = score.joinWithSmaller('simiid -> 'iidx, items) // get items info for each simiid
+ .filter('starttime, 'endtime) { fields: (Long, Option[Long]) =>
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
.project('iid, 'simiid, 'score, 'itypes)
.groupBy('iid) { _.sortBy('score).reverse.toList[(String, Double, List[String])](('simiid, 'score, 'itypes) -> 'simiidsList) }
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparatorTest.scala b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparatorTest.scala
index ccebbf6..5d93031 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparatorTest.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/DataPreparatorTest.scala
@@ -15,10 +15,15 @@
val View = "view"
//val ViewDetails = "viewDetails"
val Conversion = "conversion"
-
+
+ val appid = 2
+
def test(itypes: List[String], params: Map[String, String],
- items: List[(String, String)], u2iActions: List[(String, String, String, String, String)],
- ratings: List[(String, String, Int)], selectedItems: List[(String, String)]) = {
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
+ u2iActions: List[(String, String, String, String, String)],
+ ratings: List[(String, String, Int)],
+ selectedItems: List[(String, String, String, String)] // id, itypes, starttime, endtime
+ ) = {
val dbType = "file"
val dbName = "testpath/"
@@ -32,7 +37,7 @@
//.arg("dbHost", dbHost.get)
//.arg("dbPort", dbPort.get.toString)
.arg("hdfsRoot", hdfsRoot)
- .arg("appid", "2")
+ .arg("appid", appid.toString)
.arg("engineid", "4")
.arg("algoid", "5")
.arg("itypes", itypes)
@@ -42,14 +47,14 @@
.arg("conversionParam", params("conversionParam"))
.arg("conflictParam", params("conflictParam"))
//.arg("debug", List("test")) // NOTE: test mode
- .source(Items(appId=2, itypes=Some(itypes), dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, items)
- .source(U2iActions(appId=2, dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, u2iActions)
+ .source(Items(appId=appid, itypes=Some(itypes), dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, items)
+ .source(U2iActions(appId=appid, dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, u2iActions)
.sink[(String, String, Int)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "ratings.tsv"))) { outputBuffer =>
"correctly process and write data to ratings.tsv" in {
outputBuffer.toList must containTheSameElementsAs(ratings)
}
}
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
"correctly write selectedItems.tsv" in {
outputBuffer.toList must containTheSameElementsAs(selectedItems)
}
@@ -61,8 +66,11 @@
/** no itypes specified */
def testWithoutItypes(params: Map[String, String],
- items: List[(String, String)], u2iActions: List[(String, String, String, String, String)],
- ratings: List[(String, String, Int)], selectedItems: List[(String, String)]) = {
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
+ u2iActions: List[(String, String, String, String, String)],
+ ratings: List[(String, String, Int)],
+ selectedItems: List[(String, String, String, String)] // id, itypes, starttime, endtime
+ ) = {
val dbType = "file"
val dbName = "testpath/"
@@ -76,7 +84,7 @@
//.arg("dbHost", dbHost.get)
//.arg("dbPort", dbPort.get.toString)
.arg("hdfsRoot", hdfsRoot)
- .arg("appid", "2")
+ .arg("appid", appid.toString)
.arg("engineid", "4")
.arg("algoid", "5")
//.arg("itypes", itypes) // NOTE: no itypes args!
@@ -86,14 +94,14 @@
.arg("conversionParam", params("conversionParam"))
.arg("conflictParam", params("conflictParam"))
//.arg("debug", List("test")) // NOTE: test mode
- .source(Items(appId=2, itypes=None, dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, items)
- .source(U2iActions(appId=2, dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, u2iActions)
+ .source(Items(appId=appid, itypes=None, dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, items)
+ .source(U2iActions(appId=appid, dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, u2iActions)
.sink[(String, String, Int)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "ratings.tsv"))) { outputBuffer =>
"correctly process and write data to ratings.tsv" in {
outputBuffer.toList must containTheSameElementsAs(ratings)
}
}
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, 2, 4, 5, None, "selectedItems.tsv"))) { outputBuffer =>
"correctly write selectedItems.tsv" in {
outputBuffer.toList must containTheSameElementsAs(selectedItems)
}
@@ -103,11 +111,31 @@
}
+ val noEndtime = "PIO_NONE"
/**
* Test 1. basic. Rate actions only without conflicts
*/
val test1AllItypes = List("t1", "t2", "t3", "t4")
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test1ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test1Items = List(
+ test1ItemsMap("i0"),
+ test1ItemsMap("i1"),
+ test1ItemsMap("i2"),
+ test1ItemsMap("i3"))
+
+ def genSelectedItems(items: List[(String, String, String, String, String, String)]) = {
+ items map { x =>
+ val (id, itypes, appid, starttime, ct, endtime) = x
+ (id, itypes, starttime, endtime)
+ }
+ }
+
val test1U2i = List(
(Rate, "u0", "i0", "123450", "3"),
(Rate, "u0", "i1", "123457", "1"),
@@ -128,18 +156,30 @@
"conflictParam" -> "latest")
"itemsim.itemsimcf DataPreparator with only rate actions, all itypes, no conflict" should {
- test(test1AllItypes, test1Params, test1Items, test1U2i, test1Ratings, test1Items)
+ test(test1AllItypes, test1Params, test1Items, test1U2i, test1Ratings, genSelectedItems(test1Items))
}
"itemsim.itemsimcf DataPreparator with only rate actions, no itypes specified, no conflict" should {
- testWithoutItypes(test1Params, test1Items, test1U2i, test1Ratings, test1Items)
+ testWithoutItypes(test1Params, test1Items, test1U2i, test1Ratings, genSelectedItems(test1Items))
}
/**
* Test 2. rate actions only with conflicts
*/
val test2AllItypes = List("t1", "t2", "t3", "t4")
- val test2Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test2Items = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i1"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
+
val test2U2i = List(
(Rate, "u0", "i0", "123448", "3"),
(Rate, "u0", "i0", "123449", "4"), // highest
@@ -186,7 +226,10 @@
("u1", "i1", 3))
val test2Itypes_t1t4 = List("t1", "t4")
- val test2Items_t1t4 = List(("i0", "t1,t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2Items_t1t4 = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
val test2RatingsHighest_t1t4 = List(
("u0", "i0", 4),
("u0", "i2", 5),
@@ -199,33 +242,45 @@
val test2ParamsLowest = test2Params + ("conflictParam" -> "lowest")
"itemsim.itemsimcf DataPreparator with only rate actions, all itypes, conflict=latest" should {
- test(test2AllItypes, test2Params, test2Items, test2U2i, test2RatingsLatest, test2Items)
+ test(test2AllItypes, test2Params, test2Items, test2U2i, test2RatingsLatest, genSelectedItems(test2Items))
}
"itemsim.itemsimcf DataPreparator with only rate actions, all itypes, conflict=highest" should {
- test(test2AllItypes, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest, test2Items)
+ test(test2AllItypes, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest, genSelectedItems(test2Items))
}
"itemsim.itemsimcf DataPreparator with only rate actions, all itypes, conflict=lowest" should {
- test(test2AllItypes, test2ParamsLowest, test2Items, test2U2i, test2RatingsLowest, test2Items)
+ test(test2AllItypes, test2ParamsLowest, test2Items, test2U2i, test2RatingsLowest, genSelectedItems(test2Items))
}
"itemsim.itemsimcf DataPreparator with only rate actions, some itypes, conflict=highest" should {
- test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest_t1t4, test2Items_t1t4)
+ test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2U2i, test2RatingsHighest_t1t4, genSelectedItems(test2Items_t1t4))
}
/**
* Test 3. Different Actions without conflicts
*/
val test3AllItypes = List("t1", "t2", "t3", "t4")
- val test3Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test3ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test3Items = List(
+ test3ItemsMap("i0"),
+ test3ItemsMap("i1"),
+ test3ItemsMap("i2"),
+ test3ItemsMap("i3"))
+
val test3U2i = List(
(Rate, "u0", "i0", "123450", "4"),
- (Like, "u0", "i1", "123457", "3"),
- (Dislike, "u0", "i2", "123458", "3"),
- (View, "u0", "i3", "123459", "0"), // NOTE: assume v field won't be missing
+ (Like, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"), // NOTE: assume v field won't be missing
(Rate, "u1", "i0", "123457", "2"),
- (Conversion, "u1", "i1", "123458", "0"))
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"))
val test3Ratings = List(
("u0", "i0", 4),
@@ -239,7 +294,7 @@
"conflictParam" -> "latest")
"itemsim.itemsimcf DataPreparator with only all actions, all itypes, no conflict" should {
- test(test3AllItypes, test3Params, test3Items, test3U2i, test3Ratings, test3Items)
+ test(test3AllItypes, test3Params, test3Items, test3U2i, test3Ratings, genSelectedItems(test3Items))
}
/**
@@ -249,27 +304,39 @@
"conflictParam" -> "latest")
val test4AllItypes = List("t1", "t2", "t3", "t4")
- val test4Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test4ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test4Items = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i2"),
+ test4ItemsMap("i3"))
+
val test4U2i = List(
(Rate, "u0", "i0", "123448", "3"),
- (View, "u0", "i0", "123449", "4"), // lowest (2)
- (Like, "u0", "i0", "123451", "0"), // latest, highest (5)
- (Conversion, "u0", "i0", "123450", "1"),
+ (View, "u0", "i0", "123449", "PIO_NONE"), // lowest (2)
+ (Like, "u0", "i0", "123451", "PIO_NONE"), // latest, highest (5)
+ (Conversion, "u0", "i0", "123450", "PIO_NONE"),
(Rate, "u0", "i1", "123456", "1"), // lowest
(Rate, "u0", "i1", "123457", "4"), // highest
- (View, "u0", "i1", "123458", "3"), // latest (2)
+ (View, "u0", "i1", "123458", "PIO_NONE"), // latest (2)
- (Conversion, "u0", "i2", "123461", "2"), // latest, highest (4)
+ (Conversion, "u0", "i2", "123461", "PIO_NONE"), // latest, highest (4)
(Rate, "u0", "i2", "123459", "3"),
- (View, "u0", "i2", "123460", "5"), // lowest
+ (View, "u0", "i2", "123460", "PIO_NONE"), // lowest
(Rate, "u0", "i3", "123459", "2"),
- (View, "u1", "i0", "123457", "5"), // (2)
+ (View, "u1", "i0", "123457", "PIO_NONE"), // (2)
(Rate, "u1", "i1", "123458", "5"), // highest
- (Conversion, "u1", "i1", "123459", "4"), // (4)
- (Dislike, "u1", "i1", "123460", "1")) // latest, lowest (1)
+ (Conversion, "u1", "i1", "123459", "PIO_NONE"), // (4)
+ (Dislike, "u1", "i1", "123460", "PIO_NONE")) // latest, lowest (1)
val test4RatingsLatest = List(
("u0", "i0", 5),
@@ -280,7 +347,7 @@
("u1", "i1", 1))
"itemsim.itemsimcf DataPreparator with all actions, all itypes, and conflicts=latest" should {
- test(test4AllItypes, test4Params, test4Items, test4U2i, test4RatingsLatest, test4Items)
+ test(test4AllItypes, test4Params, test4Items, test4U2i, test4RatingsLatest, genSelectedItems(test4Items))
}
val test4ParamsIgnoreView = test4Params + ("viewParam" -> "ignore")
@@ -293,7 +360,7 @@
("u1", "i1", 1))
"itemsim.itemsimcf DataPreparator with all actions, all itypes, ignore View actions and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4U2i, test4RatingsIgnoreViewLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4U2i, test4RatingsIgnoreViewLatest, genSelectedItems(test4Items))
}
// note: currently rate action can't be ignored
@@ -308,7 +375,7 @@
("u1", "i1", 5))
"itemsim.itemsimcf DataPreparator with all actions, all itypes, ignore all actions except View (and Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4U2i, test4RatingsIgnoreAllExceptViewLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4U2i, test4RatingsIgnoreAllExceptViewLatest, genSelectedItems(test4Items))
}
// note: meaning rate action only
@@ -322,13 +389,17 @@
("u1", "i1", 5))
"itemsim.itemsimcf DataPreparator with all actions, all itypes, ignore all actions (except Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4U2i, test4RatingsIgnoreAllLatest, test4Items)
+ test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4U2i, test4RatingsIgnoreAllLatest, genSelectedItems(test4Items))
}
val test4ParamsLowest: Map[String, String] = test4Params + ("conflictParam" -> "lowest")
val test4Itypes_t3 = List("t3")
- val test4Items_t3 = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i3", "t3,t4"))
+ val test4Items_t3 = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i3"))
+
val test4RatingsLowest_t3 = List(
("u0", "i0", 2),
("u0", "i1", 1),
@@ -337,7 +408,7 @@
("u1", "i1", 1))
"itemsim.itemsimcf DataPreparator with all actions, some itypes, and conflicts=lowest" should {
- test(test4Itypes_t3, test4ParamsLowest, test4Items, test4U2i, test4RatingsLowest_t3, test4Items_t3)
+ test(test4Itypes_t3, test4ParamsLowest, test4Items, test4U2i, test4RatingsLowest_t3, genSelectedItems(test4Items_t3))
}
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructorTest.scala b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructorTest.scala
index 2138405..c5dfbe9 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructorTest.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/itemsimcf/src/test/scala/io/prediction/algorithms/scalding/itemsim/itemsimcf/ModelConstructorTest.scala
@@ -8,21 +8,25 @@
import io.prediction.commons.scalding.modeldata.ItemSimScores
class ModelConstructorTest extends Specification with TupleConversions {
- "ItemSim ModelConstructor in test mode" should {
+
+ def test(recommendationTime: Long,
+ items: List[(String, String, String, String)], //iid, itypes, starttime, endtime
+ itemSimScores: List[(String, String, String)],
+ output: List[(String, String, String, String)]) = {
+
val appid = 3
val engineid = 4
val algoid = 7
val modelSet = true
- val test1ItemSimScores = List(("i0", "i1", "0.123"), ("i0", "i2", "0.456"), ("i1", "i0", "1.23"))
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t1,t2"), ("i2", "t2,t3"))
- val test1Output = List(("i0", "i2,i1", "0.456,0.123", "[t2,t3],[t1,t2]", algoid, modelSet),("i1", "i0", "1.23", "[t1,t2,t3]", algoid, modelSet))
val dbType = "file"
val dbName = "testpath/"
val dbHost = None
val dbPort = None
val hdfsRoot = "testroot/"
-
+
+ val outputItemSimScores = output map { case (uid, iid, score, itypes) => (uid, iid, score, itypes, algoid, modelSet) }
+
JobTest("io.prediction.algorithms.scalding.itemsim.itemsimcf.ModelConstructor")
.arg("dbType", dbType)
.arg("dbName", dbName)
@@ -31,15 +35,108 @@
.arg("engineid", engineid.toString)
.arg("algoid", algoid.toString)
.arg("modelSet", modelSet.toString)
+ .arg("recommendationTime", recommendationTime.toString)
//.arg("debug", "test") // NOTE: test mode
- .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, None, "itemSimScores.tsv")), test1ItemSimScores)
- .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, None, "selectedItems.tsv")), test1Items)
+ .source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, None, "itemSimScores.tsv")), itemSimScores)
+ .source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, None, "selectedItems.tsv")), items)
.sink[(String, String, String, String, Int, Boolean)](ItemSimScores(dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort, algoid=algoid, modelset=modelSet).getSource) { outputBuffer =>
"correctly write model data to a file" in {
- outputBuffer.toList must containTheSameElementsAs(test1Output)
+ outputBuffer.toList must containTheSameElementsAs(outputItemSimScores)
}
}
.run
.finish
}
+
+ val largeNumber = 1234567890 // larger than any item starttime
+ val noEndtime = "PIO_NONE"
+
+ /* test 1 */
+ val test1ItemSimScores = List(("i0", "i1", "0.123"), ("i0", "i2", "0.456"), ("i1", "i0", "1.23"))
+ val test1Items = List(
+ ("i0", "t1,t2,t3", "12346", noEndtime),
+ ("i1", "t1,t2", "12347", noEndtime),
+ ("i2", "t2,t3", "12348", noEndtime))
+ val test1Output = List(
+ ("i0", "i2,i1", "0.456,0.123", "[t2,t3],[t1,t2]"),
+ ("i1", "i0", "1.23", "[t1,t2,t3]"))
+
+ "ItemSim ModelConstructor in test mode" should {
+ test(largeNumber, test1Items, test1ItemSimScores, test1Output)
+ }
+
+ /* test 2: test starttime and endtime */
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test2ItemSimScores = List(
+ ("i0", "i1", "0.123"),
+ ("i0", "i2", "0.456"),
+ ("i0", "i3", "0.2"),
+ ("i1", "i0", "12"),
+ ("i1", "i2", "2"))
+
+ val test2Items = List(
+ ("i0", "t1,t2,t3", "123123", "543210"),
+ ("i1", "t1,t2", "123456", "543321"),
+ ("i2", "t2,t3", "123567", "543432"),
+ ("i3", "t2", "123678", "543654"))
+
+ val test2Output = List(
+ ("i0", "i2,i3,i1", "0.456,0.2,0.123", "[t2,t3],[t2],[t1,t2]"),
+ ("i1", "i0,i2", "12.0,2.0", "[t1,t2,t3],[t2,t3]"))
+
+ val test2OutputEmpty = List()
+
+ val test2Outputi0 = List(
+ ("i1", "i0", "12.0", "[t1,t2,t3]"))
+
+ val test2Outputi0i1 = List(
+ ("i0", "i1", "0.123", "[t1,t2]"),
+ ("i1", "i0", "12.0", "[t1,t2,t3]"))
+
+ val test2Outputi2i3 = List(
+ ("i0", "i2,i3", "0.456,0.2", "[t2,t3],[t2]"),
+ ("i1", "i2", "2.0", "[t2,t3]"))
+
+ "recommendationTime < all item starttime" should {
+ test(tA, test2Items, test2ItemSimScores, test2OutputEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(tB, test2Items, test2ItemSimScores, test2Outputi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(tC, test2Items, test2ItemSimScores, test2Outputi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(tD, test2Items, test2ItemSimScores, test2Output)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(tE, test2Items, test2ItemSimScores, test2Outputi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(tF, test2Items, test2ItemSimScores, test2OutputEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(tG, test2Items, test2ItemSimScores, test2OutputEmpty)
+ }
+
}
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRank.scala b/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRank.scala
index 18df2b1..f22fbfc 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRank.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRank.scala
@@ -35,6 +35,7 @@
* --numSimilarItems: <int>. number of similar items to be generated
*
* --modelSet: <boolean> (true/false). flag to indicate which set
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Example:
* hadoop jar PredictionIO-Process-Hadoop-Scala-assembly-0.1.jar io.prediction.algorithms.scalding.itemsim.latestrank.LatestRank --hdfs --training_dbType mongodb --training_dbName predictionio_appdata --training_dbHost localhost --training_dbPort 27017 --modeldata_dbType mongodb --modeldata_dbName predictionio_modeldata --modeldata_dbHost localhost --modeldata_dbPort 27017 --hdfsRoot predictionio/ --appid 1 --engineid 1 --algoid 18 --modelSet true
@@ -68,6 +69,7 @@
val numSimilarItemsArg = args("numSimilarItems").toInt
val modelSetArg = args("modelSet").toBoolean
+ val recommendationTimeArg = args("recommendationTime").toLong
/**
* source
@@ -85,8 +87,22 @@
dbName=training_dbNameArg,
dbHost=training_dbHostArg,
dbPort=training_dbPortArg)
- .readStarttime('iidx, 'itypes, 'starttime)
- .map('starttime -> 'score) { t: String => t.toDouble }
+ .readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
+ .filter('starttime, 'endtime) { fields: (Long, Option[Long]) =>
+ // only keep items with valid starttime and endtime
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
+ .map('starttime -> 'score) { t: Long => t.toDouble }
.groupBy('iidx) { _.sortBy('score).reverse.take(numSimilarItemsArg + 1) }
val items = Items(
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRankTest.scala b/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRankTest.scala
index 25c17f7..4b542b3 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRankTest.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/latestrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/latestrank/LatestRankTest.scala
@@ -14,7 +14,8 @@
modelSet: Boolean,
itypes: List[String],
numSimilarItems: Int,
- items: List[(String, String, String, String)],
+ recommendationTime: Long,
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
itemSimScores: List[(String, String, String, String, Int, Boolean)]) = {
val training_dbType = "file"
val training_dbName = "testpath/"
@@ -40,6 +41,7 @@
.arg("itypes", itypes)
.arg("numSimilarItems", numSimilarItems.toString)
.arg("modelSet", modelSet.toString)
+ .arg("recommendationTime", recommendationTime.toString)
.source(Items(appId=appid, itypes=Some(itypes), dbType=training_dbType, dbName=training_dbName, dbHost=None, dbPort=None).getSource, items)
.sink[(String, String, String, String, Int, Boolean)](ItemSimScores(dbType=modeldata_dbType, dbName=modeldata_dbName, dbHost=None, dbPort=None, algoid=algoid, modelset=modelSet).getSource) { outputBuffer =>
"correctly write ItemSimScores" in {
@@ -58,11 +60,19 @@
.finish
}
+ val largeNumber: Long = scala.Long.MaxValue // larger than any item starttime
+ val noEndtime = "PIO_NONE"
+
+ /* test 1 */
val algoid = 12
val modelSet = false
val itypesT1T2 = List("t1", "t2")
val itypesAll = List("t1", "t2", "t3", "t4")
- val items = List(("i0", "t1,t2,t3", "19", "123456"), ("i1", "t2,t3", "19", "123457"), ("i2", "t4", "19", "21"), ("i3", "t3,t4", "19", "9876543210"))
+ val items = List(
+ ("i0", "t1,t2,t3", "19", "123456", "345678", noEndtime),
+ ("i1", "t2,t3", "19", "123457", "567890", noEndtime),
+ ("i2", "t4", "19", "21", "88", noEndtime),
+ ("i3", "t3,t4", "19", "9876543210", "67890", noEndtime))
val itemSimScoresT1T2 = List(
("i3", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
("i2", "i1,i0", "123457.0,123456.0", "[t2,t3],[t1,t2,t3]", algoid, modelSet),
@@ -82,14 +92,96 @@
("i0", "i3,i1", "9876543210.0,123457.0", "[t3,t4],[t2,t3]", algoid, modelSet))
"latestrank.LatestRank with some itypes and numSimilarItems larger than number of items" should {
- test(algoid, modelSet, itypesT1T2, 500, items, itemSimScoresT1T2)
+ test(algoid, modelSet, itypesT1T2, 500, largeNumber, items, itemSimScoresT1T2)
}
"latestrank.LatestRank with all itypes and numSimilarItems larger than number of items" should {
- test(algoid, modelSet, itypesAll, 500, items, itemSimScoresAll)
+ test(algoid, modelSet, itypesAll, 500, largeNumber, items, itemSimScoresAll)
}
"latestrank.LatestRank with all itypes numSimilarItems smaller than number of items" should {
- test(algoid, modelSet, itypesAll, 2, items, itemSimScoresAllTop2)
+ test(algoid, modelSet, itypesAll, 2, largeNumber, items, itemSimScoresAllTop2)
}
+
+ /* test 2: test starttime and endtime */
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test2Algoid = 12
+ val test2ModelSet = false
+
+ val test2ItypesAll = List("t1", "t2", "t3", "t4")
+ val test2Items = List(
+ ("i0", "t1,t2,t3", "19", "123123", "4", "543210"),
+ ("i1", "t2,t3", "19", "123456", "5", "543321"),
+ ("i2", "t4", "19", "123567", "6", "543432"),
+ ("i3", "t3,t4", "19", "123678", "7", "543654"))
+
+ val test2Users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
+
+ val test2ItemSimScoresAll = List(
+ ("i0", "i3,i2,i1", "123678.0,123567.0,123456.0", "[t3,t4],[t4],[t2,t3]", test2Algoid, test2ModelSet),
+ ("i1", "i3,i2,i0", "123678.0,123567.0,123123.0", "[t3,t4],[t4],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i2", "i3,i1,i0", "123678.0,123456.0,123123.0", "[t3,t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i3", "i2,i1,i0", "123567.0,123456.0,123123.0", "[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemSimScoresEmpty = List()
+
+ val test2ItemSimScoresi0 = List(
+ ("i1", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i2", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i3", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemSimScoresi0i1 = List(
+ ("i0", "i1", "123456.0", "[t2,t3]", test2Algoid, test2ModelSet),
+ ("i1", "i0", "123123.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i2", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i3", "i1,i0", "123456.0,123123.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemSimScoresi2i3 = List(
+ ("i0", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("i1", "i3,i2", "123678.0,123567.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("i2", "i3", "123678.0", "[t3,t4]", test2Algoid, test2ModelSet),
+ ("i3", "i2", "123567.0", "[t4]", test2Algoid, test2ModelSet))
+
+ "recommendationTime < all item starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tB, test2Items, test2ItemSimScoresi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tC, test2Items, test2ItemSimScoresi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tD, test2Items, test2ItemSimScoresAll)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tE, test2Items, test2ItemSimScoresi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
+ }
+
}
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparator.scala b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparator.scala
index dd1bce5..0e1289a 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparator.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparator.scala
@@ -60,7 +60,7 @@
val preItypesArg = args.list("itypes")
val itypesArg: Option[List[String]] = if (preItypesArg.mkString(",").length == 0) None else Option(preItypesArg)
- // determin how to map actions to rating values
+ // determine how to map actions to rating values
def getActionParam(name: String): Option[Int] = {
val actionParam: Option[Int] = args(name) match {
case "ignore" => None
@@ -100,7 +100,7 @@
*/
val items = Items(appId=trainingAppid, itypes=itypesArg,
- dbType=dbTypeArg, dbName=dbNameArg, dbHost=dbHostArg, dbPort=dbPortArg).readData('iidx, 'itypes)
+ dbType=dbTypeArg, dbName=dbNameArg, dbHost=dbHostArg, dbPort=dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
val users = Users(appId=trainingAppid,
dbType=dbTypeArg, dbName=dbNameArg, dbHost=dbHostArg, dbPort=dbPortArg).readData('uid)
@@ -118,11 +118,13 @@
users.write(userIdSink)
- items.mapTo(('iidx, 'itypes) -> ('iidx, 'itypes)) { fields: (String, List[String]) =>
- val (iidx, itypes) = fields
+ items.mapTo(('iidx, 'itypes, 'starttime, 'endtime) -> ('iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, List[String], Long, Option[Long]) =>
+ val (iidx, itypes, starttime, endtime) = fields
- (iidx, itypes.mkString(",")) // NOTE: convert List[String] into comma-separated String
- }.write(selectedItemSink)
+ // NOTE: convert List[String] into comma-separated String
+ // NOTE: endtime is optional
+ (iidx, itypes.mkString(","), starttime, endtime.map(_.toString).getOrElse("PIO_NONE"))
+ }.write(selectedItemSink)
}
@@ -147,21 +149,21 @@
// use byte offset as index for Mahout algo
val itemsIndex = TextLine(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "selectedItems.tsv")).read
- .mapTo(('offset, 'line) -> ('iindex, 'iidx, 'itypes)) { fields: (String, String) =>
+ .mapTo(('offset, 'line) -> ('iindex, 'iidx, 'itypes, 'starttime, 'endtime)) { fields: (String, String) =>
val (offset, line) = fields
val lineArray = line.split("\t")
- val (iidx, itypes) = try {
- (lineArray(0), lineArray(1))
+ val (iidx, itypes, starttime, endtime) = try {
+ (lineArray(0), lineArray(1), lineArray(2), lineArray(3))
} catch {
case e: Exception => {
assert(false, "Failed to extract iidx and itypes from the line: " + line + ". Exception: " + e)
- (0, "dummy")
+ (0, "dummy", "dummy", "dummy")
}
}
- (offset, iidx, itypes)
+ (offset, iidx, itypes, starttime, endtime)
}
val usersIndex = TextLine(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "userIds.tsv")).read
@@ -187,7 +189,7 @@
// filter and pre-process actions
u2i.joinWithSmaller('iid -> 'iidx, itemsIndex) // only select actions of these items
- .filter('action, 'v) { fields: (String, String) =>
+ .filter('action, 'v) { fields: (String, Option[String]) =>
val (action, v) = fields
val keepThis: Boolean = action match {
@@ -203,12 +205,19 @@
}
keepThis
}
- .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, String, String) =>
+ .map(('action, 'v, 't) -> ('rating, 'tLong)) { fields: (String, Option[String], String) =>
val (action, v, t) = fields
// convert actions into rating value based on "action" and "v" fields
val rating: Int = action match {
- case ACTION_RATE => v.toInt
+ case ACTION_RATE => try {
+ v.get.toInt
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to integer for ${action} action. Exception:" + e)
+ 1
+ }
+ }
case ACTION_LIKE => likeParamArg.getOrElse{
assert(false, "Action type " + action + " should have been filtered out!")
1
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructor.scala b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructor.scala
index 86bbaee..16bb57e 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructor.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/main/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructor.scala
@@ -24,6 +24,7 @@
* --modelSet: <boolean> (true/false). flag to indicate which set
*
* --numSimilarItems: <int>. number of similar items to be generated
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Optionsl args:
* --dbHost: <string> (eg. "127.0.0.1")
@@ -59,17 +60,35 @@
val modelSetArg = args("modelSet").toBoolean
val numSimilarItems = args("numSimilarItems").toInt
+ val recommendationTimeArg = args("recommendationTime").toLong
/**
* source
*/
val similarities = Tsv(AlgoFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "similarities.tsv"), ('iindex, 'simiindex, 'score)).read
+ .mapTo(('iindex, 'simiindex, 'score) -> ('iindex, 'simiindex, 'score)) {
+ fields: (String, String, Double) => fields // convert score from String to Double
+ }
val itemsIndex = Tsv(DataFile(hdfsRootArg, appidArg, engineidArg, algoidArg, evalidArg, "itemsIndex.tsv")).read
- .mapTo((0, 1, 2) -> ('iindexI, 'iidI, 'itypesI)) { fields: (String, String, String) =>
- val (iindex, iid, itypes) = fields // itypes are comma-separated String
-
- (iindex, iid, itypes.split(",").toList)
+ .mapTo((0, 1, 2, 3, 4) -> ('iindexI, 'iidI, 'itypesI, 'starttimeI, 'endtimeI)) { fields: (String, String, String, Long, String) =>
+ val (iindex, iid, itypes, starttime, endtime) = fields // itypes are comma-separated String
+
+ val endtimeOpt: Option[Long] = endtime match {
+ case "PIO_NONE" => None
+ case x: String => {
+ try {
+ Some(x.toLong)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert ${x} to Long. Exception: " + e)
+ Some(0)
+ }
+ }
+ }
+ }
+
+ (iindex, iid, itypes.split(",").toList, starttime, endtimeOpt)
}
/**
@@ -83,16 +102,37 @@
*/
val sim = similarities.joinWithSmaller('iindex -> 'iindexI, itemsIndex)
.discard('iindex, 'iindexI)
- .rename(('iidI, 'itypesI) -> ('iid, 'itypes))
+ .rename(('iidI, 'itypesI, 'starttimeI, 'endtimeI) -> ('iid, 'itypes, 'starttime, 'endtime))
.joinWithSmaller('simiindex -> 'iindexI, itemsIndex)
- val sim1 = sim.project('iid, 'iidI, 'itypesI, 'score)
- val sim2 = sim.mapTo(('iidI, 'iid, 'itypes, 'score) -> ('iid, 'iidI, 'itypesI, 'score)) { fields: (String, String, List[String], String) => fields }
+ // NOTE: use simiid's starttime and endtime. not iid's.
+ val sim1 = sim.project('iid, 'iidI, 'itypesI, 'score, 'starttimeI, 'endtimeI)
+ // NOTE: mahout only calculate half of the sim matrix, reverse the fields to get the other half
+ val sim2 = sim.mapTo(('iidI, 'iid, 'itypes, 'score, 'starttime, 'endtime) -> ('iid, 'iidI, 'itypesI, 'score, 'starttimeI, 'endtimeI)) {
+ fields: (String, String, List[String], Double, Long, Option[Long]) => fields }
val combinedSimilarities = sim1 ++ sim2
combinedSimilarities
+ .filter('starttimeI, 'endtimeI) { fields: (Long, Option[Long]) =>
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
.groupBy('iid) { _.sortBy('score).reverse.toList[(String, Double, List[String])](('iidI, 'score, 'itypesI) -> 'simiidsList) }
+ .mapTo(('iid, 'simiidsList) -> ('iid, 'simiidsList)) { fields: (String, List[(String, Double, List[String])]) =>
+ val (iid, simiidsList) = fields
+
+ (iid, simiidsList.take(numSimilarItems))
+ }
.then ( ItemSimScoresSink.writeData('iid, 'simiidsList, algoidArg, modelSetArg) _ )
}
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparatorTest.scala b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparatorTest.scala
index 1e27753..22ff23e 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparatorTest.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/DataPreparatorTest.scala
@@ -16,20 +16,22 @@
//val ViewDetails = "viewDetails"
val Conversion = "conversion"
+ val appid = 2
+
def test(itypes: List[String], params: Map[String, String],
- items: List[(String, String)],
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
users: List[Tuple1[String]],
u2iActions: List[(String, String, String, String, String)],
ratings: List[(String, String, String)],
- selectedItems: List[(String, String)],
+ selectedItems: List[(String, String, String, String)], // id, itypes, starttime, endtime
itemsIndexer: Map[String, String],
usersIndexer: Map[String, String]) = {
val userIds = users map (x => x._1)
- val selectedItemsTextLine = selectedItems map {x => (itemsIndexer(x._1), x._1 + "\t" + x._2)}
+ val selectedItemsTextLine = selectedItems map { x => (itemsIndexer(x._1), x.productIterator.mkString("\t")) }
val usersTextLine = users map {x => (usersIndexer(x._1), x._1) }
- val itemsIndex = selectedItems map {x => (itemsIndexer(x._1), x._1, x._2)}
+ val itemsIndex = selectedItems map { x => (itemsIndexer(x._1), x._1, x._2, x._3, x._4) }
val usersIndex = users map {x => (usersIndexer(x._1), x._1) }
val ratingsIndexed = ratings map {x => (usersIndexer(x._1), itemsIndexer(x._2), x._3)}
@@ -39,8 +41,7 @@
val dbHost = None
val dbPort = None
val hdfsRoot = "testroot/"
-
- val appid = 2
+
val engineid = 4
val algoid = 5
val evalid = None
@@ -65,7 +66,7 @@
outputBuffer.toList must containTheSameElementsAs(userIds)
}
}
- .sink[(String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv"))) { outputBuffer =>
"correctly write selectedItems.tsv" in {
outputBuffer.toList must containTheSameElementsAs(selectedItems)
}
@@ -90,7 +91,7 @@
.source(U2iActions(appId=appid, dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort).getSource, u2iActions)
.source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "selectedItems.tsv")), selectedItemsTextLine)
.source(TextLine(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "userIds.tsv")), usersTextLine)
- .sink[(String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv"))) { outputBuffer =>
+ .sink[(String, String, String, String, String)](Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv"))) { outputBuffer =>
// index, iid, itypes
"correctly write itemsIndex.tsv" in {
outputBuffer.toList must containTheSameElementsAs(itemsIndex)
@@ -112,11 +113,31 @@
}
+ val noEndtime = "PIO_NONE"
/**
* Test 1. basic. Rate actions only without conflicts
*/
val test1AllItypes = List("t1", "t2", "t3", "t4")
- val test1Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test1ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test1Items = List(
+ test1ItemsMap("i0"),
+ test1ItemsMap("i1"),
+ test1ItemsMap("i2"),
+ test1ItemsMap("i3"))
+
+ def genSelectedItems(items: List[(String, String, String, String, String, String)]) = {
+ items map { x =>
+ val (id, itypes, appid, starttime, ct, endtime) = x
+ (id, itypes, starttime, endtime)
+ }
+ }
+
val test1ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
val test1Users = List(Tuple1("u0"), Tuple1("u1"), Tuple1("u2"), Tuple1("u3"))
@@ -142,18 +163,30 @@
"conflictParam" -> "latest")
"DataPreparator with only rate actions, all itypes, no conflict" should {
- test(test1AllItypes, test1Params, test1Items, test1Users, test1U2i, test1Ratings, test1Items, test1ItemsIndexer, test1UsersIndexer)
+ test(test1AllItypes, test1Params, test1Items, test1Users, test1U2i, test1Ratings, genSelectedItems(test1Items), test1ItemsIndexer, test1UsersIndexer)
}
"DataPreparator with only rate actions, no itypes specified, no conflict" should {
- test(List(), test1Params, test1Items, test1Users, test1U2i, test1Ratings, test1Items, test1ItemsIndexer, test1UsersIndexer)
+ test(List(), test1Params, test1Items, test1Users, test1U2i, test1Ratings, genSelectedItems(test1Items), test1ItemsIndexer, test1UsersIndexer)
}
/**
* Test 2. rate actions only with conflicts
*/
val test2AllItypes = List("t1", "t2", "t3", "t4")
- val test2Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", noEndtime),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", noEndtime),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test2Items = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i1"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
+
val test2ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
val test2Users = List(Tuple1("u0"), Tuple1("u1"), Tuple1("u2"), Tuple1("u3"))
@@ -205,7 +238,11 @@
("u1", "i1", "3"))
val test2Itypes_t1t4 = List("t1", "t4")
- val test2Items_t1t4 = List(("i0", "t1,t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test2Items_t1t4 = List(
+ test2ItemsMap("i0"),
+ test2ItemsMap("i2"),
+ test2ItemsMap("i3"))
+
val test2RatingsHighest_t1t4 = List(
("u0", "i0", "4"),
("u0", "i2", "5"),
@@ -218,26 +255,37 @@
val test2ParamsLowest = test2Params + ("conflictParam" -> "lowest")
"DataPreparator with only rate actions, all itypes, conflict=latest" should {
- test(test2AllItypes, test2Params, test2Items, test2Users, test2U2i, test2RatingsLatest, test2Items, test2ItemsIndexer, test2UsersIndexer)
+ test(test2AllItypes, test2Params, test2Items, test2Users, test2U2i, test2RatingsLatest, genSelectedItems(test2Items), test2ItemsIndexer, test2UsersIndexer)
}
"DataPreparator with only rate actions, all itypes, conflict=highest" should {
- test(test2AllItypes, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest, test2Items, test2ItemsIndexer, test2UsersIndexer)
+ test(test2AllItypes, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest, genSelectedItems(test2Items), test2ItemsIndexer, test2UsersIndexer)
}
"DataPreparator with only rate actions, all itypes, conflict=lowest" should {
- test(test2AllItypes, test2ParamsLowest, test2Items, test2Users, test2U2i, test2RatingsLowest, test2Items, test2ItemsIndexer, test2UsersIndexer)
+ test(test2AllItypes, test2ParamsLowest, test2Items, test2Users, test2U2i, test2RatingsLowest, genSelectedItems(test2Items), test2ItemsIndexer, test2UsersIndexer)
}
"DataPreparator with only rate actions, some itypes, conflict=highest" should {
- test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest_t1t4, test2Items_t1t4, test2ItemsIndexer, test2UsersIndexer)
+ test(test2Itypes_t1t4, test2ParamsHighest, test2Items, test2Users, test2U2i, test2RatingsHighest_t1t4, genSelectedItems(test2Items_t1t4), test2ItemsIndexer, test2UsersIndexer)
}
/**
* Test 3. Different Actions without conflicts
*/
val test3AllItypes = List("t1", "t2", "t3", "t4")
- val test3Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test3ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test3Items = List(
+ test3ItemsMap("i0"),
+ test3ItemsMap("i1"),
+ test3ItemsMap("i2"),
+ test3ItemsMap("i3"))
val test3ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
@@ -246,11 +294,11 @@
val test3U2i = List(
(Rate, "u0", "i0", "123450", "4"),
- (Like, "u0", "i1", "123457", "3"),
- (Dislike, "u0", "i2", "123458", "3"),
- (View, "u0", "i3", "123459", "0"), // NOTE: assume v field won't be missing
+ (Like, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"), // NOTE: assume v field won't be missing
(Rate, "u1", "i0", "123457", "2"),
- (Conversion, "u1", "i1", "123458", "0"))
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"))
val test3Ratings = List(
("u0", "i0", "4"),
@@ -264,7 +312,7 @@
"conflictParam" -> "latest")
"DataPreparator with only all actions, all itypes, no conflict" should {
- test(test3AllItypes, test3Params, test3Items, test3Users, test3U2i, test3Ratings, test3Items, test3ItemsIndexer, test3UsersIndexer)
+ test(test3AllItypes, test3Params, test3Items, test3Users, test3U2i, test3Ratings, genSelectedItems(test3Items), test3ItemsIndexer, test3UsersIndexer)
}
/**
@@ -274,7 +322,18 @@
"conflictParam" -> "latest")
val test4AllItypes = List("t1", "t2", "t3", "t4")
- val test4Items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val test4ItemsMap = Map(
+ // id, itypes, appid, starttime, ct, endtime
+ "i0" -> ("i0", "t1,t2,t3", appid.toString, "12345", "12346", "56789"),
+ "i1" -> ("i1", "t2,t3", appid.toString, "12347", "12348", noEndtime),
+ "i2" -> ("i2", "t4", appid.toString, "12349", "12350", "56790"),
+ "i3" -> ("i3", "t3,t4", appid.toString, "12351", "12352", noEndtime))
+
+ val test4Items = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i2"),
+ test4ItemsMap("i3"))
val test4ItemsIndexer = Map("i0" -> "0", "i1" -> "4", "i2" -> "7", "i3" -> "8") // map iid to index
@@ -283,24 +342,24 @@
val test4U2i = List(
(Rate, "u0", "i0", "123448", "3"),
- (View, "u0", "i0", "123449", "4"), // lowest (2)
- (Like, "u0", "i0", "123451", "0"), // latest, highest (5)
- (Conversion, "u0", "i0", "123450", "1"),
+ (View, "u0", "i0", "123449", "PIO_NONE"), // lowest (2)
+ (Like, "u0", "i0", "123451", "PIO_NONE"), // latest, highest (5)
+ (Conversion, "u0", "i0", "123450", "PIO_NONE"),
(Rate, "u0", "i1", "123456", "1"), // lowest
(Rate, "u0", "i1", "123457", "4"), // highest
- (View, "u0", "i1", "123458", "3"), // latest (2)
+ (View, "u0", "i1", "123458", "PIO_NONE"), // latest (2)
- (Conversion, "u0", "i2", "123461", "2"), // latest, highest (4)
+ (Conversion, "u0", "i2", "123461", "PIO_NONE"), // latest, highest (4)
(Rate, "u0", "i2", "123459", "3"),
- (View, "u0", "i2", "123460", "5"), // lowest
+ (View, "u0", "i2", "123460", "PIO_NONE"), // lowest
(Rate, "u0", "i3", "123459", "2"),
- (View, "u1", "i0", "123457", "5"), // (2)
+ (View, "u1", "i0", "123457", "PIO_NONE"), // (2)
(Rate, "u1", "i1", "123458", "5"), // highest
- (Conversion, "u1", "i1", "123459", "4"), // (4)
- (Dislike, "u1", "i1", "123460", "1")) // latest, lowest (1)
+ (Conversion, "u1", "i1", "123459", "PIO_NONE"), // (4)
+ (Dislike, "u1", "i1", "123460", "PIO_NONE")) // latest, lowest (1)
val test4RatingsLatest = List(
("u0", "i0", "5"),
@@ -311,7 +370,7 @@
("u1", "i1", "1"))
"DataPreparator with all actions, all itypes, and conflicts=latest" should {
- test(test4AllItypes, test4Params, test4Items, test4Users, test4U2i, test4RatingsLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, test4Params, test4Items, test4Users, test4U2i, test4RatingsLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer)
}
val test4ParamsIgnoreView = test4Params + ("viewParam" -> "ignore")
@@ -324,7 +383,7 @@
("u1", "i1", "1"))
"DataPreparator with all actions, all itypes, ignore View actions and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4Users, test4U2i, test4RatingsIgnoreViewLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, test4ParamsIgnoreView, test4Items, test4Users, test4U2i, test4RatingsIgnoreViewLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer)
}
// note: currently rate action can't be ignored
@@ -339,7 +398,7 @@
("u1", "i1", "5"))
"DataPreparator with all actions, all itypes, ignore all actions except View (and Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllExceptViewLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, test4ParamsIgnoreAllExceptView, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllExceptViewLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer)
}
// note: meaning rate action only
@@ -353,13 +412,17 @@
("u1", "i1", "5"))
"DataPreparator with all actions, all itypes, ignore all actions (except Rate) and conflicts=latest" should {
- test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllLatest, test4Items, test4ItemsIndexer, test4UsersIndexer)
+ test(test4AllItypes, test4ParamsIgnoreAll, test4Items, test4Users, test4U2i, test4RatingsIgnoreAllLatest, genSelectedItems(test4Items), test4ItemsIndexer, test4UsersIndexer)
}
val test4ParamsLowest: Map[String, String] = test4Params + ("conflictParam" -> "lowest")
val test4Itypes_t3 = List("t3")
- val test4Items_t3 = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i3", "t3,t4"))
+ val test4Items_t3 = List(
+ test4ItemsMap("i0"),
+ test4ItemsMap("i1"),
+ test4ItemsMap("i3"))
+
val test4RatingsLowest_t3 = List(
("u0", "i0", "2"),
("u0", "i1", "1"),
@@ -368,7 +431,7 @@
("u1", "i1", "1"))
"DataPreparator with only all actions, some itypes, and conflicts=lowest" should {
- test(test4Itypes_t3, test4ParamsLowest, test4Items, test4Users, test4U2i, test4RatingsLowest_t3, test4Items_t3, test4ItemsIndexer, test4UsersIndexer)
+ test(test4Itypes_t3, test4ParamsLowest, test4Items, test4Users, test4U2i, test4RatingsLowest_t3, genSelectedItems(test4Items_t3), test4ItemsIndexer, test4UsersIndexer)
}
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructorTest.scala b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructorTest.scala
index 135b531..73e4790 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructorTest.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/mahout/src/test/scala/io/prediction/algorithms/scalding/mahout/itemsim/ModelConstructorTest.scala
@@ -10,12 +10,13 @@
class ModelConstructorTest extends Specification with TupleConversions {
- def test(numSimilarItems: Int,
- items: List[(String, String, String)],
+ val appid = 3
+
+ def test(numSimilarItems: Int, recommendationTime: Long,
+ items: List[(String, String, String, String, String)], //(iindex, iid, itypes, starttime, endtime)
similarities: List[(String, String, String)],
output: List[(String, String, String, String)]) = {
- val appid = 3
val engineid = 4
val algoid = 7
val evalid = None
@@ -38,6 +39,7 @@
.arg("algoid", algoid.toString)
.arg("modelSet", modelSet.toString)
.arg("numSimilarItems", numSimilarItems.toString)
+ .arg("recommendationTime", recommendationTime.toString)
.source(Tsv(AlgoFile(hdfsRoot, appid, engineid, algoid, evalid, "similarities.tsv"), new Fields("iindex", "simiindex", "score")), similarities)
.source(Tsv(DataFile(hdfsRoot, appid, engineid, algoid, evalid, "itemsIndex.tsv")), items)
.sink[(String, String, String, String, Int, Boolean)](ItemSimScores(dbType=dbType, dbName=dbName, dbHost=dbHost, dbPort=dbPort, algoid=algoid, modelset=modelSet).getSource) { outputBuffer =>
@@ -50,7 +52,14 @@
}
- val test1Items = List(("0", "i0", "t1,t2,t3"), ("1", "i1", "t1,t2"), ("2", "i2", "t2,t3"), ("3", "i3", "t2"))
+ val noEndtime = "PIO_NONE"
+
+ /* test 1*/
+ val test1Items = List(
+ ("0", "i0", "t1,t2,t3", "12346", noEndtime),
+ ("1", "i1", "t1,t2", "12347", noEndtime),
+ ("2", "i2", "t2,t3", "12348", noEndtime),
+ ("3", "i3", "t2", "12349", noEndtime))
val test1Similarities = List(
("0", "1", "0.83"),
@@ -66,10 +75,146 @@
("i2", "i1,i3,i0", "0.51,0.32,0.25", "[t1,t2],[t2],[t1,t2,t3]"),
("i3", "i1,i0,i2", "0.68,0.49,0.32", "[t1,t2],[t1,t2,t3],[t2,t3]"))
+ val test1Output1 = List(
+ ("i0", "i1", "0.83", "[t1,t2]"),
+ ("i1", "i0", "0.83", "[t1,t2,t3]"),
+ ("i2", "i1", "0.51", "[t1,t2]"),
+ ("i3", "i1", "0.68", "[t1,t2]"))
+
+ val test1Output2 = List(
+ ("i0", "i1,i3", "0.83,0.49", "[t1,t2],[t2]"),
+ ("i1", "i0,i3", "0.83,0.68", "[t1,t2,t3],[t2]"),
+ ("i2", "i1,i3", "0.51,0.32", "[t1,t2],[t2]"),
+ ("i3", "i1,i0", "0.68,0.49", "[t1,t2],[t1,t2,t3]"))
+
"mahout.itemsim ModelConstructor" should {
- test(100, test1Items, test1Similarities, test1Output)
+ test(100, 1234567890, test1Items, test1Similarities, test1Output)
}
+ "mahout.itemsim ModelConstructor with numSimilarItems=1" should {
+
+ test(1, 1234567890, test1Items, test1Similarities, test1Output1)
+
+ }
+
+ "mahout.itemsim ModelConstructor with numSimilarItems=2" should {
+
+ test(2, 1234567890, test1Items, test1Similarities, test1Output2)
+
+ }
+
+ /* test 2: score sorting */
+
+ val test2Items = List(
+ ("0", "i0", "t1,t2,t3", "12346", noEndtime),
+ ("1", "i1", "t1,t2", "12347", noEndtime),
+ ("2", "i2", "t2,t3", "12348", noEndtime),
+ ("3", "i3", "t2", "12349", noEndtime))
+
+ val test2Similarities = List(
+ ("0", "1", "83"),
+ ("0", "2", "200"),
+ ("0", "3", "4"),
+ ("1", "2", "9"),
+ ("1", "3", "68"),
+ ("2", "3", "1000"))
+
+ val test2Output = List(
+ ("i0", "i2,i1,i3", "200.0,83.0,4.0", "[t2,t3],[t1,t2],[t2]"),
+ ("i1", "i0,i3,i2", "83.0,68.0,9.0", "[t1,t2,t3],[t2],[t2,t3]"),
+ ("i2", "i3,i0,i1", "1000.0,200.0,9.0", "[t2],[t1,t2,t3],[t1,t2]"),
+ ("i3", "i2,i1,i0", "1000.0,68.0,4.0", "[t2,t3],[t1,t2],[t1,t2,t3]"))
+
+ "mahout.itemsim ModelConstructor (score should not be compared as string)" should {
+
+ test(100, 1234567890, test2Items, test2Similarities, test2Output)
+
+ }
+
+ /* test3: test starttime and endtime */
+
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test3Items = List(
+ ("0", "i0", "t1,t2,t3", "123123", "543210"),
+ ("1", "i1", "t1,t2", "123456", "543321"),
+ ("2", "i2", "t2,t3", "123567", "543432"),
+ ("3", "i3", "t2", "123678", "543654"))
+
+ val test3Similarities = List(
+ ("0", "1", "83"),
+ ("0", "2", "200"),
+ ("0", "3", "4"),
+ ("1", "2", "9"),
+ ("1", "3", "68"),
+ ("2", "3", "1000"))
+
+ val test3Output = List(
+ ("i0", "i2,i1,i3", "200.0,83.0,4.0", "[t2,t3],[t1,t2],[t2]"),
+ ("i1", "i0,i3,i2", "83.0,68.0,9.0", "[t1,t2,t3],[t2],[t2,t3]"),
+ ("i2", "i3,i0,i1", "1000.0,200.0,9.0", "[t2],[t1,t2,t3],[t1,t2]"),
+ ("i3", "i2,i1,i0", "1000.0,68.0,4.0", "[t2,t3],[t1,t2],[t1,t2,t3]"))
+
+ val test3OutputEmpty = List()
+
+ val test3Outputi0 = List(
+ ("i1", "i0", "83.0", "[t1,t2,t3]"),
+ ("i2", "i0", "200.0", "[t1,t2,t3]"),
+ ("i3", "i0", "4.0", "[t1,t2,t3]"))
+
+ val test3Outputi0i1 = List(
+ ("i0", "i1", "83.0", "[t1,t2]"),
+ ("i1", "i0", "83.0", "[t1,t2,t3]"),
+ ("i2", "i0,i1", "200.0,9.0", "[t1,t2,t3],[t1,t2]"),
+ ("i3", "i1,i0", "68.0,4.0", "[t1,t2],[t1,t2,t3]"))
+
+ val test3Outputi2i3 = List(
+ ("i0", "i2,i3", "200.0,4.0", "[t2,t3],[t2]"),
+ ("i1", "i3,i2", "68.0,9.0", "[t2],[t2,t3]"),
+ ("i2", "i3", "1000.0", "[t2]"),
+ ("i3", "i2", "1000.0", "[t2,t3]"))
+
+ "numSimilarItems=100 and recommendationTime < all item starttime" should {
+ test(100, tA, test3Items, test3Similarities, test3OutputEmpty)
+ }
+
+ "numSimilarItems=100 and recommendationTime == earliest starttime" should {
+ test(100, tB, test3Items, test3Similarities, test3Outputi0)
+ }
+
+ "numSimilarItems=100 and recommendationTime > some items starttime" should {
+ test(100, tC, test3Items, test3Similarities, test3Outputi0i1)
+ }
+
+ "numSimilarItems=100 and recommendationTime > all item starttime and < all item endtime" should {
+ test(100, tD, test3Items, test3Similarities, test3Output)
+ }
+
+ "numSimilarItems=100 and recommendationTime > some item endtime" should {
+ test(100, tE, test3Items, test3Similarities, test3Outputi2i3)
+ }
+
+ "numSimilarItems=100 and recommendationTime == last item endtime" should {
+ test(100, tF, test3Items, test3Similarities, test3OutputEmpty)
+ }
+
+ "numSimilarItems=100 and recommendationTime > last item endtime" should {
+ test(100, tG, test3Items, test3Similarities, test3OutputEmpty)
+ }
+
}
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRank.scala b/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRank.scala
index aa5c3e2..c43d4df 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRank.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/main/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRank.scala
@@ -33,8 +33,8 @@
*
* --itypes: <string separated by white space>. optional. eg "--itypes type1 type2". If no --itypes specified, then ALL itypes will be used.
* --numSimilarItems: <int>. number of similar items to be generated
- *
* --modelSet: <boolean> (true/false). flag to indicate which set
+ * --recommendationTime: <long> (eg. 9876543210). recommend items with starttime <= recommendationTime and endtime > recommendationTime
*
* Example:
* hadoop jar PredictionIO-Process-Hadoop-Scala-assembly-0.1.jar io.prediction.algorithms.scalding.itemsim.randomrank.RandomRank --hdfs --training_dbType mongodb --training_dbName predictionio_appdata --training_dbHost localhost --training_dbPort 27017 --modeldata_dbType mongodb --modeldata_dbName predictionio_modeldata --modeldata_dbHost localhost --modeldata_dbPort 27017 --hdfsRoot predictionio/ --appid 1 --engineid 1 --algoid 18 --modelSet true
@@ -67,6 +67,7 @@
val numSimilarItemsArg = args("numSimilarItems").toInt
val modelSetArg = args("modelSet").toBoolean
+ val recommendationTimeArg = args("recommendationTime").toLong
/**
* source
@@ -83,7 +84,21 @@
dbType=training_dbTypeArg,
dbName=training_dbNameArg,
dbHost=training_dbHostArg,
- dbPort=training_dbPortArg).readData('iidx, 'itypes)
+ dbPort=training_dbPortArg).readStartEndtime('iidx, 'itypes, 'starttime, 'endtime)
+ .filter('starttime, 'endtime) { fields: (Long, Option[Long]) =>
+ // only keep items with valid starttime and endtime
+ val (starttimeI, endtimeI) = fields
+
+ val keepThis: Boolean = (starttimeI, endtimeI) match {
+ case (start, None) => (recommendationTimeArg >= start)
+ case (start, Some(end)) => ((recommendationTimeArg >= start) && (recommendationTimeArg < end))
+ case _ => {
+ assert(false, s"Unexpected item starttime ${starttimeI} and endtime ${endtimeI}")
+ false
+ }
+ }
+ keepThis
+ }
val items = Items(
appId=trainingAppid,
diff --git a/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRankTest.scala b/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRankTest.scala
index b1387cf..92d5e3f 100644
--- a/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRankTest.scala
+++ b/process/engines/itemsim/algorithms/hadoop/scalding/randomrank/src/test/scala/io/prediction/algorithms/scalding/itemsim/randomrank/RandomRankTest.scala
@@ -14,7 +14,8 @@
modelSet: Boolean,
itypes: List[String],
numSimilarItems: Int,
- items: List[(String, String)],
+ recommendationTime: Long,
+ items: List[(String, String, String, String, String, String)], // id, itypes, appid, starttime, ct, endtime
itemSimScores: List[(String, String, String, String, Int, Boolean)]) = {
val training_dbType = "file"
val training_dbName = "testpath/"
@@ -40,6 +41,7 @@
.arg("itypes", itypes)
.arg("numSimilarItems", numSimilarItems.toString)
.arg("modelSet", modelSet.toString)
+ .arg("recommendationTime", recommendationTime.toString)
.source(Items(appId=appid, itypes=Some(itypes), dbType=training_dbType, dbName=training_dbName, dbHost=None, dbPort=None).getSource, items)
.sink[(String, String, String, String, Int, Boolean)](ItemSimScores(dbType=modeldata_dbType, dbName=modeldata_dbName, dbHost=None, dbPort=None, algoid=algoid, modelset=modelSet).getSource) { outputBuffer =>
@@ -91,23 +93,30 @@
}
- "not generate same order of iid for all iid group" in {
+ if (getIids(itemSimScores).flatMap { x => x }.toSet.size > 1) {
+ // only check this if the iids in itemRecScores are more than 1
+ "not generate same order of iid for all uid group" in {
+ if (!(getIids(outputBuffer.toList).toSet.size > 1)) {
+ println(outputBuffer)
+ println(getIids(outputBuffer.toList).toSet)
+ }
+ getIids(outputBuffer.toList).toSet.size must be_>(1)
- getIids(outputBuffer.toList).toSet.size must be_>(1)
-
+ }
}
"itypes order match the iids order" in {
// extract (iid, itypes) from the output
val itypesList = getItypes(outputBuffer.toList)
- val itemsMap = items.toMap
+ val itemsMap = items.map(x =>
+ (x._1, x)).toMap
// use the iid only and contruct the (iid, itypes)
val expected = getIids(outputBuffer.toList).map(x =>
// x is List of iid
// create the List of item types using the iid
- x.map(x => (x, itemsMap(x).split(",").toList))
+ x.map(x => (x, itemsMap(x)._2.split(",").toList))
)
itypesList must be_==(expected)
@@ -118,30 +127,123 @@
.finish
}
+ val largeNumber: Long = scala.Long.MaxValue // larger than any item starttime
+ val noEndtime = "PIO_NONE"
+
"randomrank.RandomRank with selected itypes" should {
val algoid = 12
val modelSet = false
val itypes = List("t1", "t2")
- val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val items = List(
+ ("i0", "t1,t2,t3", "19", "123456", "345678", noEndtime),
+ ("i1", "t2,t3", "19", "123457", "567890", noEndtime),
+ ("i2", "t4", "19", "21", "88", noEndtime),
+ ("i3", "t3,t4", "19", "9876543210", "67890", noEndtime))
val itemSimScores = List(
("i1", "i0", "0.0", "[t1,t2,t3]", algoid, modelSet),
("i0", "i1", "0.0", "[t2,t3]", algoid, modelSet))
- test(algoid, modelSet, itypes, 500, items, itemSimScores)
+ test(algoid, modelSet, itypes, 500, largeNumber, items, itemSimScores)
}
"randomrank.RandomRank with all itypes" should {
val algoid = 12
val modelSet = false
val itypes = List("")
- val items = List(("i0", "t1,t2,t3"), ("i1", "t2,t3"), ("i2", "t4"), ("i3", "t3,t4"))
+ val items = List(
+ ("i0", "t1,t2,t3", "19", "123456", "345678", noEndtime),
+ ("i1", "t2,t3", "19", "123457", "567890", noEndtime),
+ ("i2", "t4", "19", "21", "88", noEndtime),
+ ("i3", "t3,t4", "19", "9876543210", "67890", noEndtime))
val itemSimScores = List(
("i3", "i0,i1,i2", "0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t4]", algoid, modelSet),
("i2", "i0,i1,i3", "0.0,0.0,0.0", "[t1,t2,t3],[t2,t3],[t3,t4]", algoid, modelSet),
("i1", "i0,i2,i3", "0.0,0.0,0.0", "[t1,t2,t3],[t4],[t3,t4]", algoid, modelSet),
("i0", "i1,i2,i3", "0.0,0.0,0.0", "[t2,t3],[t4],[t3,t4]", algoid, modelSet))
- test(algoid, modelSet, itypes, 500, items, itemSimScores)
+ test(algoid, modelSet, itypes, 500, largeNumber, items, itemSimScores)
}
// TODO: test with smaller number of numRecommendations (but can't know expected result beacause the score is random...)
+
+ /* test 2: test starttime and endtime */
+ // starttime, endtime
+ // i0 A |---------|
+ // i1 B |---------|E
+ // i2 C|---------|
+ // i3 |---------|
+ // D F G
+
+ val tA = 123122
+ val tB = 123123
+ val tC = 123457
+ val tD = 123679
+ val tE = 543322
+ val tF = 543654
+ val tG = 543655
+
+ val test2Algoid = 12
+ val test2ModelSet = false
+
+ val test2ItypesAll = List("t1", "t2", "t3", "t4")
+ val test2Items = List(
+ ("i0", "t1,t2,t3", "19", "123123", "4", "543210"),
+ ("i1", "t2,t3", "19", "123456", "5", "543321"),
+ ("i2", "t4", "19", "123567", "6", "543432"),
+ ("i3", "t3,t4", "19", "123678", "7", "543654"))
+
+ val test2Users = List(("u0", "3"), ("u1", "3"), ("u2", "3"), ("u3", "3"))
+
+ val test2ItemSimScoresAll = List(
+ ("i0", "i3,i2,i1", "0.0,0.0,0.0", "[t3,t4],[t4],[t2,t3]", test2Algoid, test2ModelSet),
+ ("i1", "i3,i2,i0", "0.0,0.0,0.0", "[t3,t4],[t4],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i2", "i3,i1,i0", "0.0,0.0,0.0", "[t3,t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i3", "i2,i1,i0", "0.0,0.0,0.0", "[t4],[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemSimScoresEmpty = List()
+
+ val test2ItemSimScoresi0 = List(
+ ("i1", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i2", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i3", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemSimScoresi0i1 = List(
+ ("i0", "i1", "0.0", "[t2,t3]", test2Algoid, test2ModelSet),
+ ("i1", "i0", "0.0", "[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i2", "i1,i0", "0.0,0.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet),
+ ("i3", "i1,i0", "0.0,0.0", "[t2,t3],[t1,t2,t3]", test2Algoid, test2ModelSet))
+
+ val test2ItemSimScoresi2i3 = List(
+ ("i0", "i3,i2", "0.0,0.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("i1", "i3,i2", "0.0,0.0", "[t3,t4],[t4]", test2Algoid, test2ModelSet),
+ ("i2", "i3", "0.0", "[t3,t4]", test2Algoid, test2ModelSet),
+ ("i3", "i2", "0.0", "[t4]", test2Algoid, test2ModelSet))
+
+ "recommendationTime < all item starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
+ }
+
+ "recommendationTime == earliest starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tB, test2Items, test2ItemSimScoresi0)
+ }
+
+ "recommendationTime > some items starttime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tC, test2Items, test2ItemSimScoresi0i1)
+ }
+
+ "recommendationTime > all item starttime and < all item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tD, test2Items, test2ItemSimScoresAll)
+ }
+
+ "recommendationTime > some item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tE, test2Items, test2ItemSimScoresi2i3)
+ }
+
+ "recommendationTime == last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
+ }
+
+ "recommendationTime > last item endtime" should {
+ test(test2Algoid, test2ModelSet, test2ItypesAll, 500, tA, test2Items, test2ItemSimScoresEmpty)
+ }
+
}
diff --git a/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/main/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparator.scala b/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/main/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparator.scala
index 2df2307..8f62463 100644
--- a/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/main/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparator.scala
+++ b/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/main/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparator.scala
@@ -115,16 +115,37 @@
* TODO: filter out items appeared in trainingU2i?
*/
testU2i
- .filter('actionTest, 'vTest) { fields: (String, String) =>
+ .filter('actionTest, 'vTest) { fields: (String, Option[String]) =>
val (action, v) = fields
goalParamArg match {
case GOAL_VIEW => (action == ACTION_VIEW)
case GOAL_CONVERSION => (action == ACTION_CONVERSION)
case GOAL_LIKE => (action == ACTION_LIKE)
- case GOAL_RATE3 => (action == ACTION_RATE) && (v.toInt >= 3)
- case GOAL_RATE4 => (action == ACTION_RATE) && (v.toInt >= 4)
- case GOAL_RATE5 => (action == ACTION_RATE) && (v.toInt >= 5)
+ case GOAL_RATE3 => try {
+ (action == ACTION_RATE) && (v.get.toInt >= 3)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to int. Exception:" + e)
+ false
+ }
+ }
+ case GOAL_RATE4 => try {
+ (action == ACTION_RATE) && (v.get.toInt >= 4)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to int. Exception:" + e)
+ false
+ }
+ }
+ case GOAL_RATE5 => try {
+ (action == ACTION_RATE) && (v.get.toInt >= 5)
+ } catch {
+ case e: Exception => {
+ assert(false, s"Failed to convert v field ${v} to int. Exception:" + e)
+ false
+ }
+ }
case _ => {
assert(false, "Invalid goalParam " + goalParamArg + ".")
false
diff --git a/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/test/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparatorTest.scala b/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/test/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparatorTest.scala
index a2e80a7..da6ee85 100644
--- a/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/test/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparatorTest.scala
+++ b/process/engines/itemsim/evaluations/hadoop/scalding/metrics/ismap/src/test/scala/io/prediction/metrics/scalding/itemsim/ismap/ISMAPAtKDataPreparatorTest.scala
@@ -79,24 +79,24 @@
val testU2i = List(
// u0
(Rate, "u0", "i0", "123450", "4"),
- (View, "u0", "i1", "123457", "1"),
- (Dislike, "u0", "i2", "123458", "0"),
- (View, "u0", "i3", "123459", "0"),
- (View, "u0", "i7", "123460", "0"),
+ (View, "u0", "i1", "123457", "PIO_NONE"),
+ (Dislike, "u0", "i2", "123458", "PIO_NONE"),
+ (View, "u0", "i3", "123459", "PIO_NONE"),
+ (View, "u0", "i7", "123460", "PIO_NONE"),
// u1
- (View, "u1", "i0", "123457", "2"),
- (Conversion, "u1", "i1", "123458", "0"),
- (Conversion, "u1", "i4", "123457", "0"),
- (Conversion, "u1", "i5", "123456", "0"),
+ (View, "u1", "i0", "123457", "PIO_NONE"),
+ (Conversion, "u1", "i1", "123458", "PIO_NONE"),
+ (Conversion, "u1", "i4", "123457", "PIO_NONE"),
+ (Conversion, "u1", "i5", "123456", "PIO_NONE"),
(Rate, "u1", "i7", "123456", "3"),
(Rate, "u1", "i8", "123454", "3"),
(Rate, "u1", "i9", "123453", "4"),
// u2
- (View, "u2", "i3", "123458", "0"),
- (Conversion, "u2", "i4", "123451", "0"),
- (Conversion, "u2", "i5", "123452", "0"))
+ (View, "u2", "i3", "123458", "PIO_NONE"),
+ (Conversion, "u2", "i4", "123451", "PIO_NONE"),
+ (Conversion, "u2", "i5", "123452", "PIO_NONE"))
"itemsim.ismap ISMAPAtKDataPreparator with goal = view" should {
val params = Map("goalParam" -> "view", "kParam" -> "4")
diff --git a/servers/admin/app/controllers/Application.scala b/servers/admin/app/controllers/Application.scala
index 216cc19..cac2814 100644
--- a/servers/admin/app/controllers/Application.scala
+++ b/servers/admin/app/controllers/Application.scala
@@ -2204,7 +2204,9 @@
"id" -> toJson(engine.id), // engine id
"appid" -> toJson(engine.appid),
"allitemtypes" -> toJson(engine.itypes == None),
- "itemtypelist" -> engine.itypes.map(x => toJson(x.toIterator.toSeq)).getOrElse(JsNull)) ++
+ "itemtypelist" -> engine.itypes.map(x => toJson(x.toIterator.toSeq)).getOrElse(JsNull),
+ "trainingdisabled" -> engine.trainingdisabled.map(toJson(_)).getOrElse(toJson(false)),
+ "trainingschedule" -> engine.trainingschedule.map(toJson(_)).getOrElse(toJson("0 * * * *"))) ++
(params map { case (k, v) => (k, toJson(v.toString)) })))
} getOrElse {
NotFound(toJson(Map("message" -> toJson(s"Invalid EngineInfo ID: ${engine.infoid}"))))
@@ -2216,16 +2218,19 @@
val f = Form(tuple(
"infoid" -> mapOfStringToAny,
"allitemtypes" -> boolean,
- "itemtypelist" -> list(text)))
+ "itemtypelist" -> list(text),
+ "trainingdisabled" -> boolean,
+ "trainingschedule" -> text))
f.bindFromRequest.fold(
e => BadRequest(toJson(Map("message" -> toJson(e.toString)))),
f => {
- val (params, allitemtypes, itemtypelist) = f
+ val (params, allitemtypes, itemtypelist, trainingdisabled, trainingschedule) = f
// NOTE: read-modify-write the original param
val itypes = if (itemtypelist.isEmpty) None else Option(itemtypelist)
val updatedParams = engine.params ++ params - "infoid"
- val updatedEngine = engine.copy(itypes = itypes, params = updatedParams)
+ val updatedEngine = engine.copy(itypes = itypes, params = updatedParams, trainingdisabled = Some(trainingdisabled), trainingschedule = Some(trainingschedule))
engines.update(updatedEngine)
+ WS.url(settingsSchedulerUrl + "/users/" + user.id + "/sync").get()
Ok
})
}
diff --git a/servers/admin/app/views/engines/template.scala.html b/servers/admin/app/views/engines/template.scala.html
index ac42bc8..30fa00d 100644
--- a/servers/admin/app/views/engines/template.scala.html
+++ b/servers/admin/app/views/engines/template.scala.html
@@ -47,6 +47,28 @@
</div>
</div>
</div>
+ <div class="boxBlock">
+ <div class="boxtitle">Training Schedule</div>
+ <div class="boxContent">
+ <p>
+ You may configure this engine's training schedule in this section.
+ The schedule can be specified by a valid "Cron-Expression", which is slightly different from the standard crontab.
+ A description of "Cron-Expression" can be found <a href="http://quartz-scheduler.org/api/2.1.7/org/quartz/CronExpression.html">here</a>.
+ </p>
+ <div class="bottomMargin">
+ <label class="checkbox">
+ <input id="trainingdisabled" <% if (data.trainingdisabled == true) { %> checked <% } %> name="trainingdisabled" type="checkbox" /> Disable scheduled training
+ </label>
+ </div>
+ <div class="control-group">
+ <label for="trainingschedule" class="control-label">Training Schedule</label>
+ <div class="controls">
+ <input id="trainingschedule" name="trainingschedule" class="span2" type="text" value="<%= data.trainingschedule %>" />
+ <span class="help-inline">If empty or invalid, "0 0 * * * ?" (train hourly at 0-minute and 0-second) is assumed.</span>
+ </div>
+ </div>
+ </div>
+ </div>
@Html(content)
</div>
<!-- End of Prediction Settings -->
diff --git a/servers/admin/app/views/engines/template.scala.js b/servers/admin/app/views/engines/template.scala.js
index 24cd4d0..1cf94b0 100644
--- a/servers/admin/app/views/engines/template.scala.js
+++ b/servers/admin/app/views/engines/template.scala.js
@@ -56,6 +56,8 @@
"change #@(p.id)": "@(p.id)Changed",
}
}
+ "change #trainingdisabled" : "trainingdisabledChanged",
+ "change #trainingschedule" : "trainingscheduleChanged",
"change #engineAllItemTypes" : "toggleAllItemTypes"
},
onEnterAddItemType : function(e) {
@@ -126,6 +128,18 @@
inputObj.prop('checked', true); // disallow unselect ALlItemTypes manually
}
},
+ trainingdisabledChanged: function(e) {
+ var trainingdisabled = this.$el.find('#trainingdisabled').is(':checked');
+ this.model.set({trainingdisabled: trainingdisabled});
+ this.model.save();
+ return false;
+ },
+ trainingscheduleChanged: function(e) {
+ var trainingschedule = this.$el.find('#trainingschedule').val();
+ this.model.set({trainingschedule: trainingschedule});
+ this.model.save();
+ return false;
+ },
@for(p <- params) {
@if(p.ui.uitype != "slider") {
@(p.id)Changed: function(e) {
diff --git a/servers/admin/build.sbt b/servers/admin/build.sbt
index 4cf284c..887cb7a 100644
--- a/servers/admin/build.sbt
+++ b/servers/admin/build.sbt
@@ -1,6 +1,6 @@
name := "predictionio-admin"
-version := "0.6.7"
+version := "0.6.8"
organization := "io.prediction"
diff --git a/servers/api/build.sbt b/servers/api/build.sbt
index 3e6e24c..5efe8ca 100644
--- a/servers/api/build.sbt
+++ b/servers/api/build.sbt
@@ -1,6 +1,6 @@
name := "predictionio-api"
-version := "0.6.7"
+version := "0.6.8"
organization := "io.prediction"
diff --git a/servers/scheduler/app/controllers/Jobs.scala b/servers/scheduler/app/controllers/Jobs.scala
index 36ad4a0..7f80d15 100644
--- a/servers/scheduler/app/controllers/Jobs.scala
+++ b/servers/scheduler/app/controllers/Jobs.scala
@@ -103,6 +103,7 @@
val defaultParams = Scheduler.algoInfos.get(alg.infoid) map { _.params.mapValues(_.defaultvalue) } getOrElse Map[String, String]()
command.setAttributes(command.attributes ++ defaultParams ++ alg.params)
command.setAttribute("algoid", alg.id)
+ command.setAttribute("localTempDir", BaseDir.algoDir(config.settingsLocalTempRoot, app.id, engine.id, alg.id, offlineEval.map(_.id)))
command.setAttribute("mahoutTempDir", BaseDir.algoDir(config.settingsHdfsRoot + "mahout_temp/", app.id, engine.id, alg.id, offlineEval.map(_.id)))
command.setAttribute("algoDir", BaseDir.algoDir(config.settingsHdfsRoot, app.id, engine.id, alg.id, offlineEval.map(_.id)))
command.setAttribute("dataFilePrefix", DataFile(config.settingsHdfsRoot, app.id, engine.id, alg.id, offlineEval.map(_.id), ""))
@@ -166,6 +167,7 @@
command.setAttribute("numRecommendations", engine.params.getOrElse("numRecommendations", 500))
command.setAttribute("numSimilarItems", engine.params.getOrElse("numSimilarItems", 500))
command.setAttribute("unseenOnly", engine.params.getOrElse("unseenonly", false))
+ command.setAttribute("recommendationTime", System.currentTimeMillis)
}
}
diff --git a/servers/scheduler/app/controllers/Scheduler.scala b/servers/scheduler/app/controllers/Scheduler.scala
index e035e35..e2a41c6 100644
--- a/servers/scheduler/app/controllers/Scheduler.scala
+++ b/servers/scheduler/app/controllers/Scheduler.scala
@@ -6,7 +6,9 @@
import play.api._
import play.api.libs.json._
import play.api.mvc._
-import org.quartz.impl.matchers.GroupMatcher._;
+import org.quartz.CronExpression
+import org.quartz.CronScheduleBuilder.cronSchedule
+import org.quartz.impl.matchers.GroupMatcher._
import org.quartz.impl.StdSchedulerFactory
import org.quartz.JobBuilder.newJob
import org.quartz.JobKey.jobKey
@@ -162,15 +164,27 @@
} getOrElse {
Logger.info(s"${logPrefix}Giving up setting up batch algo job because it does not have any batch command")
}
- } else if (scheduler.checkExists(triggerkey) == false) {
- Logger.info(s"${logPrefix}Setting up batch algo job (run every hour from now)")
- algoinfo.batchcommands map { batchcommands =>
- val job = Jobs.algoJob(config, app, engine, algo, batchcommands)
- scheduler.addJob(job, true)
- val trigger = newTrigger() forJob (jobKey(algoid, Jobs.algoJobGroup)) withIdentity (algoid, Jobs.algoJobGroup) startNow () withSchedule (simpleSchedule() withIntervalInHours (1) repeatForever ()) build ()
- scheduler.scheduleJob(trigger)
- } getOrElse {
- Logger.info(s"${logPrefix}Giving up setting up batch algo job because it does not have any batch command")
+ } else {
+ if (scheduler.checkExists(triggerkey)) {
+ Logger.info(s"${logPrefix}Resetting existing trigger")
+ scheduler.unscheduleJob(triggerkey)
+ }
+ val trainingdisabled = engine.trainingdisabled.getOrElse(false)
+ if (trainingdisabled) {
+ Logger.info(s"${logPrefix}Training disabled")
+ } else {
+ // Append a 0 in front since Quartz support granularity to seconds
+ val trainingscheduleUnvalidated = engine.trainingschedule.getOrElse("0 0 * * * ?")
+ val trainingschedule = if (CronExpression.isValidExpression(trainingscheduleUnvalidated)) trainingscheduleUnvalidated else "0 0 * * * ?"
+ Logger.info(s"${logPrefix}Setting up batch algo job with schedule ${trainingschedule}")
+ algoinfo.batchcommands map { batchcommands =>
+ val job = Jobs.algoJob(config, app, engine, algo, batchcommands)
+ scheduler.addJob(job, true)
+ val trigger = newTrigger() forJob (jobKey(algoid, Jobs.algoJobGroup)) withIdentity (algoid, Jobs.algoJobGroup) startNow () withSchedule (cronSchedule(trainingschedule) withMisfireHandlingInstructionFireAndProceed ()) build ()
+ scheduler.scheduleJob(trigger)
+ } getOrElse {
+ Logger.info(s"${logPrefix}Giving up setting up batch algo job because it does not have any batch command")
+ }
}
}
} else {
diff --git a/servers/scheduler/build.sbt b/servers/scheduler/build.sbt
index 84d4f13..1cbf303 100644
--- a/servers/scheduler/build.sbt
+++ b/servers/scheduler/build.sbt
@@ -1,6 +1,6 @@
name := "predictionio-scheduler"
-version := "0.6.7"
+version := "0.6.8"
organization := "io.prediction"
diff --git a/servers/scheduler/conf/application.conf b/servers/scheduler/conf/application.conf
index b181aff..3063136 100644
--- a/servers/scheduler/conf/application.conf
+++ b/servers/scheduler/conf/application.conf
@@ -86,28 +86,28 @@
io.prediction.commons.settings.db.port=27017
# PredictionIO Algorithms
-pdio-knnitembased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-pdio-latestrank.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-pdio-randomrank.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-itembased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-parallelals.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-knnuserbased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-thresholduserbased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-slopeone.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-alswr.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-svdsgd.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-svdplusplus.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
+pdio-knnitembased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+pdio-latestrank.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+pdio-randomrank.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-itembased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-parallelals.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-knnuserbased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-thresholduserbased.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-slopeone.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-alswr.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-svdsgd.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-svdplusplus.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
-pdio-itemsimcf.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-pdio-itemsimlatestrank.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-pdio-itemsimrandomrank.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
-mahout-itemsimcf.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
+pdio-itemsimcf.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+pdio-itemsimlatestrank.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+pdio-itemsimrandomrank.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
+mahout-itemsimcf.jar=${io.prediction.itemsim.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemSim-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
# PredictionIO generic scalding job
-io.prediction.algorithms.scalding.itemrec.generic.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.7.jar
+io.prediction.algorithms.scalding.itemrec.generic.jar=${io.prediction.itemrec.base}/algorithms/hadoop/scalding/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Hadoop-Scalding-assembly-0.6.8.jar
# Itemrec Scala Mahout Algorithms
-io.prediction.algorithms.mahout.itemrec.jar=${io.prediction.itemrec.base}/algorithms/scala/mahout/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Scala-Mahout-assembly-0.6.7.jar
+io.prediction.algorithms.mahout.itemrec.jar=${io.prediction.itemrec.base}/algorithms/scala/mahout/target/scala-2.10/PredictionIO-Process-ItemRec-Algorithms-Scala-Mahout-assembly-0.6.8.jar
# Mahout core job
io.prediction.algorithms.mahout-core-job.jar=${io.prediction.base}/vendors/mahout-distribution-0.8/mahout-core-0.8-job.jar
diff --git a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Backup.scala b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Backup.scala
index 21387e6..fa1f1f0 100644
--- a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Backup.scala
+++ b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Backup.scala
@@ -27,7 +27,7 @@
def main(args: Array[String]) {
val parser = new scopt.OptionParser[BackupConfig]("backup") {
- head("PredictionIO Backup Utility", "0.6.7")
+ head("PredictionIO Backup Utility", "0.6.8")
help("help") text ("prints this usage text")
arg[String]("<backup directory>") action { (x, c) =>
c.copy(backupDir = x)
diff --git a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Restore.scala b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Restore.scala
index ecb9f77..9f492d9 100644
--- a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Restore.scala
+++ b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Restore.scala
@@ -27,7 +27,7 @@
def main(args: Array[String]) {
val parser = new scopt.OptionParser[RestoreConfig]("restore") {
- head("PredictionIO Restore Utility", "0.6.7")
+ head("PredictionIO Restore Utility", "0.6.8")
help("help") text ("prints this usage text")
opt[Unit]("upgrade") action { (_, c) =>
c.copy(upgrade = true)
diff --git a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/UpdateCheck.scala b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/UpdateCheck.scala
index 993a1a1..2ee0eb3 100644
--- a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/UpdateCheck.scala
+++ b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/UpdateCheck.scala
@@ -16,7 +16,7 @@
def main(args: Array[String]) {
val parser = new scopt.OptionParser[UpdateCheckConfig]("updatecheck") {
- head("PredictionIO Update Checker", "0.6.7")
+ head("PredictionIO Update Checker", "0.6.8")
help("help") text ("prints this usage text")
opt[String]("localVersion") action { (x, c) =>
c.copy(localVersion = x)
diff --git a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Upgrade.scala b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Upgrade.scala
index e492318..a8d05c4 100644
--- a/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Upgrade.scala
+++ b/tools/softwaremanager/src/main/scala/io/prediction/tools/softwaremanager/Upgrade.scala
@@ -17,7 +17,7 @@
/** Upgrades previous version to current version. */
object Upgrade {
def main(args: Array[String]) {
- val thisVersion = "0.6.7"
+ val thisVersion = "0.6.8"
val parser = new scopt.OptionParser[UpgradeConfig]("upgrade") {
head("PredictionIO Software Upgrade Utility", thisVersion)
help("help") text ("prints this usage text")