Merge pull request #4 from vngrs/develop
Fix unknown item filter
diff --git a/src/main/scala/CooccurrenceAlgorithm.scala b/src/main/scala/CooccurrenceAlgorithm.scala
index f94dd7e..54ff741 100644
--- a/src/main/scala/CooccurrenceAlgorithm.scala
+++ b/src/main/scala/CooccurrenceAlgorithm.scala
@@ -58,8 +58,10 @@
val userItem = events
// map item from string to integer index
- .map ( v => (v.user, itemStringIntMap.getOrElse(v.item, 1)) )
- .filter { case (user, item) => item != -1 }
+ .flatMap {
+ case ViewEvent(user, item, _) if itemStringIntMap.contains(item) => Some(user, itemStringIntMap(item))
+ case _ => None
+ }
// if user view same item multiple times, only count as once
.distinct()
.cache()
diff --git a/src/test/scala/CooccurenceAlgorithmTest.scala b/src/test/scala/CooccurenceAlgorithmTest.scala
index 572844e..8a8e0b4 100644
--- a/src/test/scala/CooccurenceAlgorithmTest.scala
+++ b/src/test/scala/CooccurenceAlgorithmTest.scala
@@ -35,7 +35,9 @@
ViewEvent("u5", "i0", 1000040),
ViewEvent("u5", "i1", 1000040),
ViewEvent("u6", "i0", 1000040),
- ViewEvent("u6", "i1", 1000040)
+ ViewEvent("u6", "i1", 1000040),
+ ViewEvent("u7", "i4", 1000050), //Unknown item
+ ViewEvent("u7", "i3", 1000050)
)
"trainCooccurrence" should "return top 10 correctly" in {