slight fix
diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 22476a9..3289a48 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -5,7 +5,6 @@
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/org/template/textclassification/Preparator.scala" afterPath="$PROJECT_DIR$/src/main/scala/org/template/textclassification/Preparator.scala" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/src/main/scala/org/template/textclassification/PreparedData.scala" afterPath="$PROJECT_DIR$/src/main/scala/org/template/textclassification/PreparedData.scala" />
<change type="MODIFICATION" beforePath="$PROJECT_DIR$/engine.json" afterPath="$PROJECT_DIR$/engine.json" />
- <change type="MODIFICATION" beforePath="$PROJECT_DIR$/data/import_eventserver.py" afterPath="$PROJECT_DIR$/data/import_eventserver.py" />
</list>
<ignored path="TextManipulationEngine.iws" />
<ignored path=".idea/workspace.xml" />
@@ -49,11 +48,11 @@
</provider>
</entry>
</file>
- <file leaf-file-name="PreparedData.scala" pinned="false" current-in-tab="true">
+ <file leaf-file-name="PreparedData.scala" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/PreparedData.scala">
<provider selected="true" editor-type-id="text-editor">
- <state vertical-scroll-proportion="0.80261594">
- <caret line="65" column="23" selection-start-line="65" selection-start-column="23" selection-end-line="65" selection-end-column="23" />
+ <state vertical-scroll-proportion="0.0">
+ <caret line="67" column="28" selection-start-line="67" selection-start-column="28" selection-end-line="67" selection-end-column="28" />
<folding />
</state>
</provider>
@@ -63,7 +62,7 @@
<entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/Preparator.scala">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0">
- <caret line="34" column="0" selection-start-line="34" selection-start-column="0" selection-end-line="34" selection-end-column="0" />
+ <caret line="12" column="11" selection-start-line="12" selection-start-column="11" selection-end-line="12" selection-end-column="11" />
<folding />
</state>
</provider>
@@ -113,7 +112,7 @@
<entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/DataSource.scala">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0">
- <caret line="106" column="11" selection-start-line="106" selection-start-column="11" selection-end-line="106" selection-end-column="11" />
+ <caret line="109" column="18" selection-start-line="109" selection-start-column="18" selection-end-line="109" selection-end-column="18" />
<folding />
</state>
</provider>
@@ -129,11 +128,11 @@
</provider>
</entry>
</file>
- <file leaf-file-name="engine.json" pinned="false" current-in-tab="false">
+ <file leaf-file-name="engine.json" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/engine.json">
<provider selected="true" editor-type-id="text-editor">
- <state vertical-scroll-proportion="-16.25">
- <caret line="26" column="1" selection-start-line="26" selection-start-column="1" selection-end-line="26" selection-end-column="1" />
+ <state vertical-scroll-proportion="0.33047736">
+ <caret line="18" column="17" selection-start-line="18" selection-start-column="17" selection-end-line="18" selection-end-column="17" />
<folding />
</state>
</provider>
@@ -159,10 +158,10 @@
<option value="$PROJECT_DIR$/src/main/scala/org/template/textclassification/Engine.scala" />
<option value="$PROJECT_DIR$/src/main/scala/org/template/textclassification/DataSource.scala" />
<option value="$PROJECT_DIR$/src/main/scala/org/template/textclassification/Evaluation.scala" />
- <option value="$PROJECT_DIR$/engine.json" />
<option value="$PROJECT_DIR$/data/import_eventserver.py" />
- <option value="$PROJECT_DIR$/src/main/scala/org/template/textclassification/Preparator.scala" />
<option value="$PROJECT_DIR$/src/main/scala/org/template/textclassification/PreparedData.scala" />
+ <option value="$PROJECT_DIR$/src/main/scala/org/template/textclassification/Preparator.scala" />
+ <option value="$PROJECT_DIR$/engine.json" />
</list>
</option>
</component>
@@ -918,12 +917,12 @@
<option name="number" value="Default" />
<updated>1430951117412</updated>
<workItem from="1430951119557" duration="5853000" />
- <workItem from="1430981904623" duration="9414000" />
+ <workItem from="1430981904623" duration="9980000" />
</task>
<servers />
</component>
<component name="TimeTrackingManager">
- <option name="totallyTimeSpent" value="15267000" />
+ <option name="totallyTimeSpent" value="15833000" />
</component>
<component name="ToolWindowManager">
<frame x="2436" y="22" width="1305" height="938" extended-state="0" />
@@ -1096,14 +1095,6 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/Engine.scala">
- <provider selected="true" editor-type-id="text-editor">
- <state vertical-scroll-proportion="0.0">
- <caret line="26" column="6" selection-start-line="26" selection-start-column="6" selection-end-line="26" selection-end-column="6" />
- <folding />
- </state>
- </provider>
- </entry>
<entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/NBModel.scala">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0">
@@ -1122,18 +1113,10 @@
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/engine.json">
- <provider selected="true" editor-type-id="text-editor">
- <state vertical-scroll-proportion="-16.25">
- <caret line="26" column="1" selection-start-line="26" selection-start-column="1" selection-end-line="26" selection-end-column="1" />
- <folding />
- </state>
- </provider>
- </entry>
- <entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/DataSource.scala">
+ <entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/PreparedData.scala">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0">
- <caret line="106" column="11" selection-start-line="106" selection-start-column="11" selection-end-line="106" selection-end-column="11" />
+ <caret line="67" column="28" selection-start-line="67" selection-start-column="28" selection-end-line="67" selection-end-column="28" />
<folding />
</state>
</provider>
@@ -1141,15 +1124,31 @@
<entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/Preparator.scala">
<provider selected="true" editor-type-id="text-editor">
<state vertical-scroll-proportion="0.0">
- <caret line="34" column="0" selection-start-line="34" selection-start-column="0" selection-end-line="34" selection-end-column="0" />
+ <caret line="12" column="11" selection-start-line="12" selection-start-column="11" selection-end-line="12" selection-end-column="11" />
<folding />
</state>
</provider>
</entry>
- <entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/PreparedData.scala">
+ <entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/Engine.scala">
<provider selected="true" editor-type-id="text-editor">
- <state vertical-scroll-proportion="0.80261594">
- <caret line="65" column="23" selection-start-line="65" selection-start-column="23" selection-end-line="65" selection-end-column="23" />
+ <state vertical-scroll-proportion="0.0">
+ <caret line="26" column="6" selection-start-line="26" selection-start-column="6" selection-end-line="26" selection-end-column="6" />
+ <folding />
+ </state>
+ </provider>
+ </entry>
+ <entry file="file://$PROJECT_DIR$/src/main/scala/org/template/textclassification/DataSource.scala">
+ <provider selected="true" editor-type-id="text-editor">
+ <state vertical-scroll-proportion="0.0">
+ <caret line="109" column="18" selection-start-line="109" selection-start-column="18" selection-end-line="109" selection-end-column="18" />
+ <folding />
+ </state>
+ </provider>
+ </entry>
+ <entry file="file://$PROJECT_DIR$/engine.json">
+ <provider selected="true" editor-type-id="text-editor">
+ <state vertical-scroll-proportion="0.33047736">
+ <caret line="18" column="17" selection-start-line="18" selection-start-column="17" selection-end-line="18" selection-end-column="17" />
<folding />
</state>
</provider>
diff --git a/engine.json b/engine.json
index 037aafd..6369cc7 100644
--- a/engine.json
+++ b/engine.json
@@ -12,7 +12,7 @@
"params": {
"nMin": 1,
"nMax": 2,
- "inverseIdfMin" : 0.1,
+ "inverseIdfMin" : 0,
"inverseIdfMax" : 0.9
}
},
diff --git a/src/main/scala/org/template/textclassification/Preparator.scala b/src/main/scala/org/template/textclassification/Preparator.scala
index 5275f3a..5c9f7f1 100644
--- a/src/main/scala/org/template/textclassification/Preparator.scala
+++ b/src/main/scala/org/template/textclassification/Preparator.scala
@@ -13,7 +13,8 @@
case class PreparatorParams(
nMin: Int,
nMax: Int,
- cutoff : Double
+ inverseIdfMin : Double,
+ inverseIdfMax : Double
) extends Params
@@ -24,7 +25,7 @@
// Prepare your training data.
def prepare(sc : SparkContext, td: TrainingData): PreparedData = {
- new PreparedData(td, pp.nMin, pp.nMax, pp.cutoff)
+ new PreparedData(td, pp.nMin, pp.nMax, pp.inverseIdfMin, pp. inverseIdfMax)
}
}
diff --git a/src/main/scala/org/template/textclassification/PreparedData.scala b/src/main/scala/org/template/textclassification/PreparedData.scala
index 717c659..04409f6 100644
--- a/src/main/scala/org/template/textclassification/PreparedData.scala
+++ b/src/main/scala/org/template/textclassification/PreparedData.scala
@@ -24,7 +24,8 @@
val td: TrainingData,
val nMin: Int,
val nMax: Int,
- val cutoff : Double
+ val inverseIdfMin : Double,
+ val inverseIdfMax : Double
) extends Serializable {