scan for and save reply-to values and subjects
diff --git a/src/plugins/scanners/pipermail.py b/src/plugins/scanners/pipermail.py index 1e2583b..f780129 100644 --- a/src/plugins/scanners/pipermail.py +++ b/src/plugins/scanners/pipermail.py
@@ -89,12 +89,21 @@ posters = {} no_posters = 0 emails = 0 + senders = {} for message in messages: emails += 1 sender = message['from'] name = sender if not 'subject' in message or not message['subject'] or not 'from' in message or not message['from']: continue + + irt = message.get('in-reply-to', None) + if not irt and message.get('references'): + irt = message.get('references').split("\n")[0].strip() + replyto = None + if irt and irt in senders: + replyto = senders[irt] + print("This is a reply to %s" % replyto) raw_subject = re.sub(r"^[a-zA-Z]+\s*:\s*", "", message['subject'], count=10) raw_subject = re.sub(r"[\r\n\t]+", "", raw_subject, count=10) if not raw_subject in rawtopics: @@ -114,6 +123,7 @@ 'name': name, 'email': sender } + senders[message.get('message-id', "??")] = sender mdate = email.utils.parsedate_tz(message['date']) mdatestring = time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email.utils.mktime_tz(mdate))) if not sender in knowns: @@ -134,6 +144,8 @@ 'sourceID': source['sourceID'], 'date': mdatestring, 'sender': sender, + 'replyto': replyto, + 'subject': message['subject'], 'address': sender, 'ts': email.utils.mktime_tz(mdate), 'id': message['message-id']
diff --git a/src/plugins/scanners/ponymail.py b/src/plugins/scanners/ponymail.py index 0908b28..cbb90cc 100644 --- a/src/plugins/scanners/ponymail.py +++ b/src/plugins/scanners/ponymail.py
@@ -51,7 +51,28 @@ kids += 1 kids += countSubs(child) return kids - + +def repliedTo(emails, struct): + myList = {} + for eml in struct: + myID = eml['tid'] + if 'children' in eml: + for child in eml['children']: + myList[child['tid']] = myID + if len(child['children']) > 0: + cList = repliedTo(emails, child['children']) + myList.update(cList) + return myList + +def getSender(email): + sender = email['from'] + name = sender + m = re.match(r"(.+)\s*<(.+)>", email['from'], flags=re.UNICODE) + if m: + name = m.group(1).replace('"', "").strip() + sender = m.group(2) + return sender + def scan(KibbleBit, source): # Validate URL first url = re.match(r"(https?://.+)/list\.html\?(.+)@(.+)", source['sourceURL']) @@ -68,8 +89,8 @@ # Pony Mail requires a UI cookie in order to work. Maked sure we have one! cookie = None - if 'auth' in source: - cookie = source['auth'].get('cookie', None) + if 'creds' in source and source['creds']: + cookie = source['creds'].get('cookie', None) if not cookie: KibbleBit.pprint("Pony Mail instance at %s requires an authorized cookie, none found! Bailing." % source['sourceURL']) source['steps']['mail'] = { @@ -137,6 +158,7 @@ else: KibbleBit.pprint("JSON was missing fields, aborting!") break + replyList = repliedTo(js['emails'], js['thread_struct']) topics = js['no_threads'] posters = {} no_posters = 0 @@ -191,7 +213,7 @@ sid = hashlib.sha1( ("%s%s" % (source['organisation'], sender)).encode('ascii', errors='replace')).hexdigest() if KibbleBit.exists('person',sid): knowns[sender] = True - if not sender in knowns: + if not sender in knowns or name != sender: KibbleBit.append('person', { 'upsert': True, @@ -201,6 +223,13 @@ 'id' :hashlib.sha1( ("%s%s" % (source['organisation'], sender)).encode('ascii', errors='replace')).hexdigest() }) knowns[sender] = True + replyTo = None + if email['id'] in replyList: + rt = replyList[email['id']] + for eml in js['emails']: + if eml['id'] == rt: + replyTo = getSender(eml) + print("Email was reply to %s" % sender) jse = { 'organisation': source['organisation'], 'sourceURL': source['sourceURL'], @@ -208,6 +237,8 @@ 'date': time.strftime("%Y/%m/%d %H:%M:%S", time.gmtime(email['epoch'])), 'sender': sender, 'address': sender, + 'subject': email['subject'], + 'replyto': replyTo, 'ts': email['epoch'], 'id': email['id'] }