Merge pull request #4 from vngrs/develop

Fix unknown item filter
diff --git a/src/main/scala/CooccurrenceAlgorithm.scala b/src/main/scala/CooccurrenceAlgorithm.scala
index f94dd7e..54ff741 100644
--- a/src/main/scala/CooccurrenceAlgorithm.scala
+++ b/src/main/scala/CooccurrenceAlgorithm.scala
@@ -58,8 +58,10 @@
 
     val userItem = events
       // map item from string to integer index
-      .map ( v => (v.user, itemStringIntMap.getOrElse(v.item, 1)) )
-      .filter { case (user, item) => item != -1 }
+      .flatMap {
+        case ViewEvent(user, item, _) if itemStringIntMap.contains(item) => Some(user, itemStringIntMap(item))
+        case _ => None
+      }
       // if user view same item multiple times, only count as once
       .distinct()
       .cache()
diff --git a/src/test/scala/CooccurenceAlgorithmTest.scala b/src/test/scala/CooccurenceAlgorithmTest.scala
index 572844e..8a8e0b4 100644
--- a/src/test/scala/CooccurenceAlgorithmTest.scala
+++ b/src/test/scala/CooccurenceAlgorithmTest.scala
@@ -35,7 +35,9 @@
     ViewEvent("u5", "i0", 1000040),
     ViewEvent("u5", "i1", 1000040),
     ViewEvent("u6", "i0", 1000040),
-    ViewEvent("u6", "i1", 1000040)
+    ViewEvent("u6", "i1", 1000040),
+    ViewEvent("u7", "i4", 1000050), //Unknown item
+    ViewEvent("u7", "i3", 1000050)
   )
 
   "trainCooccurrence" should "return top 10 correctly" in {