Merge branch 'develop'
diff --git a/README.md b/README.md
index 84bab22..02bd884 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,16 @@
## Versions
+### v0.3.0
+
+- update for PredictionIO 0.9.2, including:
+
+ - use new PEventStore API
+ - use appName in DataSource parameter
+
### v0.2.0
-- Update for PredictionIO 0.9.2
+- update build.sbt and template.json for PredictionIO 0.9.2
### v0.1.4
diff --git a/engine.json b/engine.json
index d20a2ad..833c831 100644
--- a/engine.json
+++ b/engine.json
@@ -4,7 +4,7 @@
"engineFactory": "org.template.classification.ClassificationEngine",
"datasource": {
"params": {
- "appId": 2
+ "appName": "INVALID_APP_NAME"
}
},
"algorithms": [
diff --git a/src/main/scala/DataSource.scala b/src/main/scala/DataSource.scala
index c9b8b82..23a47a9 100644
--- a/src/main/scala/DataSource.scala
+++ b/src/main/scala/DataSource.scala
@@ -5,7 +5,7 @@
import io.prediction.controller.EmptyActualResult
import io.prediction.controller.Params
import io.prediction.data.storage.Event
-import io.prediction.data.storage.Storage
+import io.prediction.data.store.PEventStore
import org.apache.spark.SparkContext
import org.apache.spark.SparkContext._
@@ -16,7 +16,7 @@
import grizzled.slf4j.Logger
case class DataSourceParams(
- appId: Int,
+ appName: String,
evalK: Option[Int] // define the k-fold parameter.
) extends Params
@@ -28,9 +28,9 @@
override
def readTraining(sc: SparkContext): TrainingData = {
- val eventsDb = Storage.getPEvents()
- val labeledPoints: RDD[LabeledPoint] = eventsDb.aggregateProperties(
- appId = dsp.appId,
+
+ val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(
+ appName = dsp.appName,
entityType = "user",
// only keep entities with these required properties defined
required = Some(List("plan", "attr0", "attr1", "attr2")))(sc)
@@ -67,9 +67,8 @@
// illustration purpose, a recommended approach is to factor out this logic
// into a helper function and have both readTraining and readEval call the
// helper.
- val eventsDb = Storage.getPEvents()
- val labeledPoints: RDD[LabeledPoint] = eventsDb.aggregateProperties(
- appId = dsp.appId,
+ val labeledPoints: RDD[LabeledPoint] = PEventStore.aggregateProperties(
+ appName = dsp.appName,
entityType = "user",
// only keep entities with these required properties defined
required = Some(List("plan", "attr0", "attr1", "attr2")))(sc)
@@ -98,15 +97,15 @@
val evalK = dsp.evalK.get
val indexedPoints: RDD[(LabeledPoint, Long)] = labeledPoints.zipWithIndex
- (0 until evalK).map { idx =>
+ (0 until evalK).map { idx =>
val trainingPoints = indexedPoints.filter(_._2 % evalK != idx).map(_._1)
val testingPoints = indexedPoints.filter(_._2 % evalK == idx).map(_._1)
(
new TrainingData(trainingPoints),
new EmptyEvaluationInfo(),
- testingPoints.map {
- p => (new Query(p.features.toArray), new ActualResult(p.label))
+ testingPoints.map {
+ p => (new Query(p.features.toArray), new ActualResult(p.label))
}
)
}
diff --git a/src/main/scala/Evaluation.scala b/src/main/scala/Evaluation.scala
index ec8e16c..a9722f3 100644
--- a/src/main/scala/Evaluation.scala
+++ b/src/main/scala/Evaluation.scala
@@ -24,7 +24,7 @@
// the data is read, and a evalK parameter is used to define the
// cross-validation.
private[this] val baseEP = EngineParams(
- dataSourceParams = DataSourceParams(appId = 19, evalK = Some(5)))
+ dataSourceParams = DataSourceParams(appName = "INVALID_APP_NAME", evalK = Some(5)))
// Second, we specify the engine params list by explicitly listing all
// algorithm parameters. In this case, we evaluate 3 engine params, each with