move init and word downloads into __init__

commit: 960ed733c815264503a41f6883d02dc37e3976a8 [log] [tgz]
author: Daniel Gruno <humbedooh@apache.org> Thu Feb 25 09:56:37 2021 +0100
committer: GitHub <noreply@github.com> Thu Feb 25 09:56:37 2021 +0100
tree: 11b4351b757259f67a7ecf5990a563914dca45f0
parent: bb6023b12db4b7ed51b179730050fd81cd066e4e [diff]
diff --git a/spamfilter.py b/spamfilter.py
index c4af47a..c9b3f02 100644
--- a/spamfilter.py
+++ b/spamfilter.py

@@ -22,17 +22,10 @@
 
 MINIMUM_NUMBER_OF_WORDS = 6  # We need at least SOME words to safely classify this
 
-nltk.download("stopwords")
-nltk.download("punkt")
 
 class BayesScanner:
     """ A very naïve spam scanner """
 
-    stopwords = nltk.corpus.stopwords.words("english")
-    punctuation = string.punctuation
-    ham_words: typing.List[str] = []
-    spam_words: typing.List[str] = []
-
     def reload_spamdb(self):
         """ This is how corpus/spamdb.json was built..."""
         spamdb = requests.get(
@@ -60,6 +53,14 @@
             f.close()
 
     def __init__(self):
+        self.punctuation = string.punctuation
+        self.ham_words: typing.List[str] = []
+        self.spam_words: typing.List[str] = []
+
+        nltk.download("stopwords")
+        nltk.download("punkt")
+        self.stopwords = nltk.corpus.stopwords.words("english")
+
         spamdb = json.load(open("corpus/spamdb.json"))
         self.spam_words = spamdb["spam"]
         self.ham_words = spamdb["ham"]
@@ -96,7 +97,7 @@
 
     def scan_text(self, text: str):
         text_processed = self.tokenify(text)
-        if len(text_processed) >= MINIMUM_NUMBER_OF_WORDS:
+        if len(text_processed) > MINIMUM_NUMBER_OF_WORDS:
             h, s = self.count_words(text_processed)
             result = self.naive_result(h, s)
         else:
commit	960ed733c815264503a41f6883d02dc37e3976a8	[log] [tgz]
author	Daniel Gruno <humbedooh@apache.org>	Thu Feb 25 09:56:37 2021 +0100
committer	GitHub <noreply@github.com>	Thu Feb 25 09:56:37 2021 +0100
tree	11b4351b757259f67a7ecf5990a563914dca45f0
parent	bb6023b12db4b7ed51b179730050fd81cd066e4e [diff]