Fix missing unk vector (#35)

diff --git a/tf-ner-poc/src/main/python/namefinder/namefinder.py b/tf-ner-poc/src/main/python/namefinder/namefinder.py
index e7fe436..548a3a9 100644
--- a/tf-ner-poc/src/main/python/namefinder/namefinder.py
+++ b/tf-ner-poc/src/main/python/namefinder/namefinder.py
@@ -29,6 +29,10 @@
 import os
 from tempfile import TemporaryDirectory
 
+# global variables for unknown word and numbers
+__UNK__ = '__UNK__'
+__NUM__ = '__NUM__'
+
 
 # Parse the OpenNLP Name Finder format into begin, end, type triples
 class NameSample:
@@ -87,7 +91,7 @@
                 if word_dict.get(token) is not None:
                     vector = word_dict[token]
                 else:
-                    vector = word_dict['__UNK__']
+                    vector = word_dict[__UNK__]
 
                 sentence.append(vector)
 
@@ -103,7 +107,7 @@
             labels.append(label)
 
             for label_string in label:
-                if not label_string in self.label_dict:
+                if label_string not in self.label_dict:
                     self.label_dict[label_string] = len(self.label_dict)
 
         return sentences, labels, chars_set
@@ -350,12 +354,20 @@
                 vector_size = len(parts) - 1
 
             if len(parts) != vector_size + 1:
-                # print("Bad Vector: ",len(line),len(parts), line)
                 raise VectorException("Bad Vector in line: {}, size: {} vector: {}".format(len(line), len(parts), line))
                 continue
             word_dict[parts[0]] = len(word_dict)
             embeddings.append(np.array(parts[1:], dtype=np.float32))
 
+    # add unknown word symbol and number symbol
+    if __UNK__ not in word_dict:
+        word_dict[__UNK__] = len(word_dict)
+        unk_random = 0.08 * np.random.random_sample(vector_size) - 0.04
+        embeddings.append(unk_random.astype(np.float32))
+    if __NUM__ not in word_dict:
+        word_dict[__NUM__] = len(word_dict)
+        embeddings.append(np.zeros(vector_size, dtype=np.float32))
+
     # Create a reverse word dict
     rev_word_dict = {}
     for word, id in word_dict.items():