blob: ba188ea1be930c2d73c0409d792cbbb9e1a97f0f [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
########################################################################
# OPENAPI-URI: /api/mail/map
########################################################################
# get:
# responses:
# '200':
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Sloc'
# description: 200 Response
# default:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Error'
# description: unexpected error
# security:
# - cookieAuth: []
# summary: Shows a breakdown of email author reply mappings
# post:
# requestBody:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/defaultWidgetArgs'
# responses:
# '200':
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Sloc'
# description: 200 Response
# default:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Error'
# description: unexpected error
# security:
# - cookieAuth: []
# summary: Shows a breakdown of email author reply mappings
#
########################################################################
"""
This is the committer relationship list renderer for Kibble
"""
import json
import time
import hashlib
import copy
import re
import math
badBots = r"(JIRA|Hudson|jira|jenkins|GitHub|git@|dev@|bugzilla|gerrit)"
def run(API, environ, indata, session):
# We need to be logged in for this!
if not session.user:
raise API.exception(403, "You must be logged in to use this API endpoint! %s")
now = time.time()
# First, fetch the view if we have such a thing enabled
viewList = []
if indata.get("view"):
viewList = session.getView(indata.get("view"))
if indata.get("subfilter"):
viewList = session.subFilter(indata.get("subfilter"), view=viewList)
dateTo = indata.get("to", int(time.time()))
dateFrom = indata.get(
"from", dateTo - (86400 * 30 * 6)
) # Default to a 6 month span
span = dateTo - dateFrom
####################################################################
####################################################################
dOrg = session.user["defaultOrganisation"] or "apache"
query = {
"query": {
"bool": {
"must": [
{"range": {"ts": {"from": dateFrom, "to": dateTo}}},
{"term": {"organisation": dOrg}},
]
}
}
}
# Source-specific or view-specific??
if indata.get("source"):
query["query"]["bool"]["must"].append(
{"term": {"sourceID": indata.get("source")}}
)
elif viewList:
query["query"]["bool"]["must"].append({"terms": {"sourceID": viewList}})
if indata.get("search"):
query["query"]["bool"]["must"].append(
{"regexp": {"subject": indata.get("search")}}
)
if indata.get("email"):
query["query"]["bool"]["minimum_should_match"] = 1
query["query"]["bool"]["should"] = [
{"term": {"replyto.keyword": indata.get("email")}},
{"term": {"sender": indata.get("email")}},
]
# Get number of commits, this period, per repo
query["aggs"] = {
"per_ml": {
"terms": {
"field": "replyto.keyword" if not indata.get("author") else "sender",
"size": 150,
}
}
}
res = session.DB.ES.search(
index=session.DB.dbname, doc_type="email", size=0, body=query
)
repos = {}
repo_commits = {}
authorlinks = {}
max_emails = 0
max_links = 0
max_shared = 0
max_authors = 0
minLinks = indata.get("links", 1)
if indata.get("email"):
del query["query"]["bool"]["should"]
del query["query"]["bool"]["minimum_should_match"]
# For each repo, count commits and gather data on authors
for doc in res["aggregations"]["per_ml"]["buckets"]:
sourceID = doc["key"]
emails = doc["doc_count"]
if re.search(badBots, sourceID): # No bots
continue
if emails > (span / 86400) * 4: # More than 4/day and we consider you a bot!
continue
# Gather the unique authors/committers
query["aggs"] = {
"per_ml": {
"terms": {
"field": "sender"
if not indata.get("author")
else "replyto.keyword",
"size": 5000,
}
}
}
xquery = copy.deepcopy(query)
xquery["query"]["bool"]["must"].append(
{
"term": {
"replyto.keyword"
if not indata.get("author")
else "sender": sourceID
}
}
)
xres = session.DB.ES.search(
index=session.DB.dbname, doc_type="email", size=0, body=xquery
)
authors = []
for person in xres["aggregations"]["per_ml"]["buckets"]:
pk = person["key"]
authors.append(pk)
if emails > max_emails:
max_emails = emails
repos[sourceID] = authors
repo_commits[sourceID] = emails
# Now, figure out which repos share the same contributors
repo_links = {}
repo_notoriety = {}
repodatas = {}
repo_authors = {}
# Grab data of all sources
for ID, repo in repos.items():
mylinks = {}
hID = hashlib.sha1(
("%s%s" % (dOrg, ID)).encode("ascii", errors="replace")
).hexdigest()
if not session.DB.ES.exists(index=session.DB.dbname, doc_type="person", id=hID):
continue
repodatas[ID] = session.DB.ES.get(
index=session.DB.dbname, doc_type="person", id=hID
)
for ID, repo in repos.items():
mylinks = {}
if not ID in repodatas:
continue
repodata = repodatas[ID]
oID = ID
if indata.get("collapse"):
m = re.search(indata.get("collapse"), repodata["_source"]["email"])
if m:
ID = m.group(1)
xlinks = []
for xID, xrepo in repos.items():
if xID in repodatas:
xrepodata = repodatas[xID]
if indata.get("collapse"):
m = re.search(indata.get("collapse"), xrepodata["_source"]["email"])
if m:
xID = m.group(1)
if xID != ID:
if ID in xrepo:
xlinks.append(xID)
lname = "%s||%s" % (ID, xID) # Link name
rname = "%s||%s" % (xID, ID) # Reverse link name
if (
len(xlinks) > 0
and rname not in repo_links
and len(xlinks) >= minLinks
):
mylinks[ID] = mylinks.get(ID, 0) + 1
repo_links[lname] = repo_links.get(lname, 0) + len(
xlinks
) # How many contributors in common between project A and B?
if repo_links[lname] > max_shared:
max_shared = repo_links[lname]
elif rname in repo_links:
repo_links[rname] = repo_links.get(rname, 0) + len(xlinks)
if ID not in repo_notoriety:
repo_notoriety[ID] = set()
repo_notoriety[ID].update(
mylinks.keys()
) # How many projects is this repo connected to?
if ID not in repo_authors:
repo_authors[ID] = set()
repo_authors[ID].update(repo) # How many projects is this repo connected to?
if ID != oID:
repo_commits[ID] = repo_commits.get(ID, 0) + repo_commits[oID]
if repo_commits[ID] > max_emails:
max_emails = repo_commits[ID] # Used for calculating max link thickness
if len(repo_notoriety[ID]) > max_links:
max_links = len(repo_notoriety[ID])
if len(repo_authors[ID]) > max_authors:
max_authors = len(
repo_authors[ID]
) # Used for calculating max sphere size in charts
# Now, pull it all together!
nodes = []
links = []
existing_repos = []
for sourceID, ns in repo_notoriety.items():
lsize = 0
for k in repo_links.keys():
fr, to = k.split("||")
if fr == sourceID or to == sourceID:
lsize += 1
asize = len(repo_authors[sourceID])
doc = {
"id": sourceID,
"gravatar": hashlib.md5(sourceID.lower().encode("utf-8")).hexdigest(),
"name": repodatas[sourceID]["_source"].get("name", sourceID),
"replies": repo_commits[sourceID],
"authors": asize,
"links": lsize,
"size": max(
5, (1 - abs(math.log10(repo_commits[sourceID] / max_emails))) * 45
),
"tooltip": "%u connections, %u fellows, %u replies to"
% (lsize, asize, repo_commits[sourceID]),
}
nodes.append(doc)
existing_repos.append(sourceID)
for k, s in repo_links.items():
size = s
fr, to = k.split("||")
if fr in existing_repos and to in existing_repos:
doc = {
"source": fr,
"target": to,
"value": max(1, (size / max_shared) * 5),
"name": "%s ↔ %s" % (fr, to),
"tooltip": "%u topics exchanged" % size,
}
links.append(doc)
JSON_OUT = {
"maxLinks": max_links,
"maxShared": max_shared,
"widgetType": {"chartType": "link"}, # Recommendation for the UI
"links": links,
"nodes": nodes,
"okay": True,
"responseTime": time.time() - now,
}
yield json.dumps(JSON_OUT)