Fix DataSource to read "content", "e-mail", and use label "spam" for tutorial data.
Fix engine.json for default algorithm setting.
diff --git a/README.md b/README.md
index 5b27591..75efa05 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,15 @@
-# TextManipulationEngine
+# Text Classification Engine
Look at the following [tutorial](https://docs.prediction.io/demo/textclassification/) for a Quick Start guide and implementation details.
# Release Information
+## Version 3.1
+
+Fix DataSource to read "content", "e-mail", and use label "spam" for tutorial data.
+Fix engine.json for default algorithm setting.
+
+
## Version 2.2
Modified PreparedData to use MLLib hashing and tf-idf implementations.
@@ -20,7 +26,6 @@
Fixed import script bug occuring with Python 2.
-## Version 1.1 Changes
+## Version 1.1 Changes
Changed data import Python script to pull straight from the [20 newsgroups](http://qwone.com/~jason/20Newsgroups/) page.
-
diff --git a/engine.json b/engine.json
index 478fa87..a6158ba 100644
--- a/engine.json
+++ b/engine.json
@@ -1,4 +1,4 @@
- {
+{
"id": "default",
"description": "Default settings",
"engineFactory": "org.template.textclassification.TextClassificationEngine",
@@ -18,13 +18,7 @@
{
"name": "lr",
"params": {
- "maxIter": 1,
- "regParam": 0.00000005,
- "stepSize": 5.0,
- "bitPrecision": 22,
- "modelName": "model.vw",
- "namespace": "n",
- "ngram": 1
+ "regParam": 0.00000005
}
}
]
diff --git a/src/main/scala/org/template/textclassification/DataSource.scala b/src/main/scala/org/template/textclassification/DataSource.scala
index fe5c056..6a29909 100644
--- a/src/main/scala/org/template/textclassification/DataSource.scala
+++ b/src/main/scala/org/template/textclassification/DataSource.scala
@@ -39,15 +39,15 @@
//Get RDD of Events.
PEventStore.find(
appName = dsp.appName,
- entityType = Some("user"), // specify data entity type
- eventNames = Some(List("$set")) // specify data event name
+ entityType = Some("content"), // specify data entity type
+ eventNames = Some(List("e-mail")) // specify data event name
// Convert collected RDD of events to and RDD of Observation
// objects.
)(sc).map(e => {
val label : String = e.properties.get[String]("label")
Observation(
- if (label == "1") 1.0 else 0.0,
+ if (label == "spam") 1.0 else 0.0,
e.properties.get[String]("text"),
label
)