| # Licensed to the Apache Software Foundation (ASF) under one |
| # or more contributor license agreements. See the NOTICE file |
| # distributed with this work for additional information |
| # regarding copyright ownership. The ASF licenses this file |
| # to you under the Apache License, Version 2.0 (the |
| # "License"); you may not use this file except in compliance |
| # with the License. You may obtain a copy of the License at |
| # |
| # http://www.apache.org/licenses/LICENSE-2.0 |
| # |
| # Unless required by applicable law or agreed to in writing, |
| # software distributed under the License is distributed on an |
| # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| # KIND, either express or implied. See the License for the |
| # specific language governing permissions and limitations |
| # under the License. |
| |
| ######################################################################## |
| # OPENAPI-URI: /api/mail/map |
| ######################################################################## |
| # get: |
| # responses: |
| # '200': |
| # content: |
| # application/json: |
| # schema: |
| # $ref: '#/components/schemas/Sloc' |
| # description: 200 Response |
| # default: |
| # content: |
| # application/json: |
| # schema: |
| # $ref: '#/components/schemas/Error' |
| # description: unexpected error |
| # security: |
| # - cookieAuth: [] |
| # summary: Shows a breakdown of email author reply mappings |
| # post: |
| # requestBody: |
| # content: |
| # application/json: |
| # schema: |
| # $ref: '#/components/schemas/defaultWidgetArgs' |
| # responses: |
| # '200': |
| # content: |
| # application/json: |
| # schema: |
| # $ref: '#/components/schemas/Sloc' |
| # description: 200 Response |
| # default: |
| # content: |
| # application/json: |
| # schema: |
| # $ref: '#/components/schemas/Error' |
| # description: unexpected error |
| # security: |
| # - cookieAuth: [] |
| # summary: Shows a breakdown of email author reply mappings |
| # |
| ######################################################################## |
| |
| |
| """ |
| This is the committer relationship list renderer for Kibble |
| """ |
| |
| import json |
| import time |
| import hashlib |
| import copy |
| import re |
| import math |
| |
| badBots = r"(JIRA|Hudson|jira|jenkins|GitHub|git@|dev@|bugzilla|gerrit)" |
| |
| |
| def run(API, environ, indata, session): |
| |
| # We need to be logged in for this! |
| if not session.user: |
| raise API.exception(403, "You must be logged in to use this API endpoint! %s") |
| |
| now = time.time() |
| |
| # First, fetch the view if we have such a thing enabled |
| viewList = [] |
| if indata.get("view"): |
| viewList = session.getView(indata.get("view")) |
| if indata.get("subfilter"): |
| viewList = session.subFilter(indata.get("subfilter"), view=viewList) |
| |
| dateTo = indata.get("to", int(time.time())) |
| dateFrom = indata.get( |
| "from", dateTo - (86400 * 30 * 6) |
| ) # Default to a 6 month span |
| span = dateTo - dateFrom |
| |
| #################################################################### |
| #################################################################### |
| dOrg = session.user["defaultOrganisation"] or "apache" |
| query = { |
| "query": { |
| "bool": { |
| "must": [ |
| {"range": {"ts": {"from": dateFrom, "to": dateTo}}}, |
| {"term": {"organisation": dOrg}}, |
| ] |
| } |
| } |
| } |
| # Source-specific or view-specific?? |
| if indata.get("source"): |
| query["query"]["bool"]["must"].append( |
| {"term": {"sourceID": indata.get("source")}} |
| ) |
| elif viewList: |
| query["query"]["bool"]["must"].append({"terms": {"sourceID": viewList}}) |
| if indata.get("search"): |
| query["query"]["bool"]["must"].append( |
| {"regexp": {"subject": indata.get("search")}} |
| ) |
| |
| if indata.get("email"): |
| query["query"]["bool"]["minimum_should_match"] = 1 |
| query["query"]["bool"]["should"] = [ |
| {"term": {"replyto.keyword": indata.get("email")}}, |
| {"term": {"sender": indata.get("email")}}, |
| ] |
| |
| # Get number of commits, this period, per repo |
| query["aggs"] = { |
| "per_ml": { |
| "terms": { |
| "field": "replyto.keyword" if not indata.get("author") else "sender", |
| "size": 150, |
| } |
| } |
| } |
| res = session.DB.ES.search( |
| index=session.DB.dbname, doc_type="email", size=0, body=query |
| ) |
| |
| repos = {} |
| repo_commits = {} |
| authorlinks = {} |
| max_emails = 0 |
| max_links = 0 |
| max_shared = 0 |
| max_authors = 0 |
| minLinks = indata.get("links", 1) |
| |
| if indata.get("email"): |
| del query["query"]["bool"]["should"] |
| del query["query"]["bool"]["minimum_should_match"] |
| |
| # For each repo, count commits and gather data on authors |
| for doc in res["aggregations"]["per_ml"]["buckets"]: |
| sourceID = doc["key"] |
| emails = doc["doc_count"] |
| if re.search(badBots, sourceID): # No bots |
| continue |
| if emails > (span / 86400) * 4: # More than 4/day and we consider you a bot! |
| continue |
| |
| # Gather the unique authors/committers |
| query["aggs"] = { |
| "per_ml": { |
| "terms": { |
| "field": "sender" |
| if not indata.get("author") |
| else "replyto.keyword", |
| "size": 5000, |
| } |
| } |
| } |
| xquery = copy.deepcopy(query) |
| |
| xquery["query"]["bool"]["must"].append( |
| { |
| "term": { |
| "replyto.keyword" |
| if not indata.get("author") |
| else "sender": sourceID |
| } |
| } |
| ) |
| xres = session.DB.ES.search( |
| index=session.DB.dbname, doc_type="email", size=0, body=xquery |
| ) |
| authors = [] |
| for person in xres["aggregations"]["per_ml"]["buckets"]: |
| pk = person["key"] |
| authors.append(pk) |
| if emails > max_emails: |
| max_emails = emails |
| repos[sourceID] = authors |
| repo_commits[sourceID] = emails |
| |
| # Now, figure out which repos share the same contributors |
| repo_links = {} |
| repo_notoriety = {} |
| repodatas = {} |
| repo_authors = {} |
| |
| # Grab data of all sources |
| for ID, repo in repos.items(): |
| mylinks = {} |
| hID = hashlib.sha1( |
| ("%s%s" % (dOrg, ID)).encode("ascii", errors="replace") |
| ).hexdigest() |
| if not session.DB.ES.exists(index=session.DB.dbname, doc_type="person", id=hID): |
| continue |
| repodatas[ID] = session.DB.ES.get( |
| index=session.DB.dbname, doc_type="person", id=hID |
| ) |
| |
| for ID, repo in repos.items(): |
| mylinks = {} |
| if not ID in repodatas: |
| continue |
| repodata = repodatas[ID] |
| oID = ID |
| if indata.get("collapse"): |
| m = re.search(indata.get("collapse"), repodata["_source"]["email"]) |
| if m: |
| ID = m.group(1) |
| xlinks = [] |
| for xID, xrepo in repos.items(): |
| if xID in repodatas: |
| xrepodata = repodatas[xID] |
| if indata.get("collapse"): |
| m = re.search(indata.get("collapse"), xrepodata["_source"]["email"]) |
| if m: |
| xID = m.group(1) |
| if xID != ID: |
| |
| if ID in xrepo: |
| xlinks.append(xID) |
| lname = "%s||%s" % (ID, xID) # Link name |
| rname = "%s||%s" % (xID, ID) # Reverse link name |
| if ( |
| len(xlinks) > 0 |
| and rname not in repo_links |
| and len(xlinks) >= minLinks |
| ): |
| mylinks[ID] = mylinks.get(ID, 0) + 1 |
| repo_links[lname] = repo_links.get(lname, 0) + len( |
| xlinks |
| ) # How many contributors in common between project A and B? |
| if repo_links[lname] > max_shared: |
| max_shared = repo_links[lname] |
| elif rname in repo_links: |
| repo_links[rname] = repo_links.get(rname, 0) + len(xlinks) |
| if ID not in repo_notoriety: |
| repo_notoriety[ID] = set() |
| repo_notoriety[ID].update( |
| mylinks.keys() |
| ) # How many projects is this repo connected to? |
| |
| if ID not in repo_authors: |
| repo_authors[ID] = set() |
| repo_authors[ID].update(repo) # How many projects is this repo connected to? |
| |
| if ID != oID: |
| repo_commits[ID] = repo_commits.get(ID, 0) + repo_commits[oID] |
| if repo_commits[ID] > max_emails: |
| max_emails = repo_commits[ID] # Used for calculating max link thickness |
| if len(repo_notoriety[ID]) > max_links: |
| max_links = len(repo_notoriety[ID]) |
| if len(repo_authors[ID]) > max_authors: |
| max_authors = len( |
| repo_authors[ID] |
| ) # Used for calculating max sphere size in charts |
| |
| # Now, pull it all together! |
| nodes = [] |
| links = [] |
| existing_repos = [] |
| for sourceID, ns in repo_notoriety.items(): |
| lsize = 0 |
| for k in repo_links.keys(): |
| fr, to = k.split("||") |
| if fr == sourceID or to == sourceID: |
| lsize += 1 |
| asize = len(repo_authors[sourceID]) |
| doc = { |
| "id": sourceID, |
| "gravatar": hashlib.md5(sourceID.lower().encode("utf-8")).hexdigest(), |
| "name": repodatas[sourceID]["_source"].get("name", sourceID), |
| "replies": repo_commits[sourceID], |
| "authors": asize, |
| "links": lsize, |
| "size": max( |
| 5, (1 - abs(math.log10(repo_commits[sourceID] / max_emails))) * 45 |
| ), |
| "tooltip": "%u connections, %u fellows, %u replies to" |
| % (lsize, asize, repo_commits[sourceID]), |
| } |
| nodes.append(doc) |
| existing_repos.append(sourceID) |
| |
| for k, s in repo_links.items(): |
| size = s |
| fr, to = k.split("||") |
| if fr in existing_repos and to in existing_repos: |
| doc = { |
| "source": fr, |
| "target": to, |
| "value": max(1, (size / max_shared) * 5), |
| "name": "%s ↔ %s" % (fr, to), |
| "tooltip": "%u topics exchanged" % size, |
| } |
| links.append(doc) |
| |
| JSON_OUT = { |
| "maxLinks": max_links, |
| "maxShared": max_shared, |
| "widgetType": {"chartType": "link"}, # Recommendation for the UI |
| "links": links, |
| "nodes": nodes, |
| "okay": True, |
| "responseTime": time.time() - now, |
| } |
| yield json.dumps(JSON_OUT) |