blob: f2134c493fde2307e8ffdfcc55bbc3c7ff56d6a6 [file] [log] [blame]
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Simple endpoint that returns the server's gathered activity data"""
""" THIS ONLY DEALS WITH PUBLIC EMAILS FOR NOW - AAA IS BEING WORKED ON"""
import plugins.server
import plugins.session
import plugins.messages
import plugins.defuzzer
import plugins.offloader
import re
import email.utils
import typing
import aiohttp.web
import time
async def process(
server: plugins.server.BaseServer, session: plugins.session.SessionObject, indata: dict,
) -> typing.Union[dict, aiohttp.web.Response]:
# must provide list and domain
xlist = indata.get("list", None)
xdomain = indata.get("domain", None)
if not xlist or not xdomain:
return aiohttp.web.Response(headers={"content-type": "application/json",}, text='{}')
try:
query_defuzzed = plugins.defuzzer.defuzz(indata)
query_defuzzed_nodate = plugins.defuzzer.defuzz(indata, nodate=True)
except ValueError as ve: # If defuzzer encounters syntax errors, it will throw a ValueError
return aiohttp.web.Response(headers={"content-type": "text/plain",}, status=400, text=str(ve))
except AssertionError as ae: # If defuzzer encounters internal errors, it will throw an AssertionError
return aiohttp.web.Response(headers={"content-type": "text/plain",}, status=500, text=str(ae))
# get a filter for use with get_activity_span (no date)
# It can also be used with dated queries
query_filter = await plugins.messages.get_accessible_filter(session, query_defuzzed_nodate)
if query_filter:
query_defuzzed['filter'] = query_filter
query_defuzzed_nodate['filter'] = query_filter
# since: check if there have been recent updates to the data
if 'since' in indata:
since = indata.get('since', None)
if since:
epoch = int(since)
else:
epoch = int(time.time())
query_since = query_defuzzed.copy()
query_since['must'].append({"range" : { "epoch": { "gt": epoch}}})
results = await plugins.messages.query(
session, query_since, query_limit=1, source_fields=[] # don't need any fields
)
if len(results) == 0:
return {"changed" : False}
# statsOnly: Whether to only send statistical info (for n-grams etc), and not the
# thread struct and message bodies
# Param: quick
statsOnly = 'quick' in indata
# emailsOnly: return email summaries only, not derived data:
# i.e. omit thread_struct, top 10 participants and word-cloud
emailsOnly = 'emailsOnly' in indata
source_fields = None
if statsOnly:
source_fields = ['epoch']
results = await plugins.messages.query(
session, query_defuzzed, query_limit=server.config.database.max_hits, source_fields=source_fields
)
wordcloud = None
if server.config.ui.wordcloud and not emailsOnly and not statsOnly:
wordcloud = await plugins.messages.wordcloud(session, query_defuzzed)
oldest, youngest, active_months = await plugins.messages.get_activity_span(session, query_defuzzed_nodate)
authors = {}
tstruct = {}
top10_authors = None
if not statsOnly and not emailsOnly:
threads = plugins.messages.ThreadConstructor(results)
tstruct, authors = await server.runners.run(threads.construct)
# author entries are now [count, gravatar]
# as we cannot reconstruct the correct gravatar from an anonymised address
all_authors = sorted(authors.items(), key=lambda x: x[1][0], reverse=True) # sort in reverse by author count
top10_authors = []
for author, data in all_authors[:10]:
name, address = email.utils.parseaddr(author)
top10_authors.append(
{"email": address, "name": name, "count": data[0], "gravatar": data[1]}
)
# Trim email data so as to reduce download sizes
for msg in results:
if statsOnly:
for header in list(msg.keys()):
if not header == 'epoch':
del msg[header]
else:
plugins.messages.trim_email(msg, external=True)
output = {
"firstYear": oldest.year,
"lastYear": youngest.year,
"firstMonth": oldest.month,
"lastMonth": youngest.month,
"active_months": active_months,
"hits": len(results),
"numparts": len(authors),
"no_threads": len(tstruct),
"emails": list(sorted(results, key=lambda x: x["epoch"])),
"participants": top10_authors or {},
"searchlist": f"<{xlist}.{xdomain}>",
"domain": xdomain,
"name": xlist,
"list": f"{xlist}@{xdomain}",
"searchParams": indata,
"unixtime": int(time.time()),
}
if not statsOnly and not emailsOnly:
output['thread_struct'] = tstruct
if wordcloud:
output['cloud'] = wordcloud
return output
def register(server: plugins.server.BaseServer):
return plugins.server.Endpoint(process)