Merge branch 'develop'
diff --git a/README.md b/README.md
index cdade24..766cae6 100644
--- a/README.md
+++ b/README.md
@@ -6,11 +6,16 @@
## Versions
+### v0.3.2
+
+- Fix CooccurrenceAlgorithm with unknown item ids
+
### v0.3.1
- Add CooccurrenceAlgorithm.
- To use this algorithm, override engine.json by engine-cooccurrence.json,
- or specify `--variant engine-cooccurrence.json` parameter for both pio train **and** deploy
+ To use this algorithm, override `engine.json` by `engine-cooccurrence.json`,
+ or specify `--variant engine-cooccurrence.json` parameter for both `$pio train` **and**
+ `$pio deploy`
### v0.3.0
diff --git a/src/main/scala/CooccurrenceAlgorithm.scala b/src/main/scala/CooccurrenceAlgorithm.scala
index f94dd7e..3834ead 100644
--- a/src/main/scala/CooccurrenceAlgorithm.scala
+++ b/src/main/scala/CooccurrenceAlgorithm.scala
@@ -58,8 +58,11 @@
val userItem = events
// map item from string to integer index
- .map ( v => (v.user, itemStringIntMap.getOrElse(v.item, 1)) )
- .filter { case (user, item) => item != -1 }
+ .flatMap {
+ case ViewEvent(user, item, _) if itemStringIntMap.contains(item) =>
+ Some(user, itemStringIntMap(item))
+ case _ => None
+ }
// if user view same item multiple times, only count as once
.distinct()
.cache()
diff --git a/src/test/scala/CooccurenceAlgorithmTest.scala b/src/test/scala/CooccurenceAlgorithmTest.scala
index 572844e..8a8e0b4 100644
--- a/src/test/scala/CooccurenceAlgorithmTest.scala
+++ b/src/test/scala/CooccurenceAlgorithmTest.scala
@@ -35,7 +35,9 @@
ViewEvent("u5", "i0", 1000040),
ViewEvent("u5", "i1", 1000040),
ViewEvent("u6", "i0", 1000040),
- ViewEvent("u6", "i1", 1000040)
+ ViewEvent("u6", "i1", 1000040),
+ ViewEvent("u7", "i4", 1000050), //Unknown item
+ ViewEvent("u7", "i3", 1000050)
)
"trainCooccurrence" should "return top 10 correctly" in {