blob: 43b1a9e3785e8ab0c03f9c3002941477cc7bbd94 [file]
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
########################################################################
# OPENAPI-URI: /api/code/relationships
########################################################################
# get:
# responses:
# '200':
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Sloc'
# description: 200 Response
# default:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Error'
# description: unexpected error
# security:
# - cookieAuth: []
# summary: Shows a breakdown of contributor relationships between repositories
# post:
# requestBody:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/defaultWidgetArgs'
# responses:
# '200':
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Sloc'
# description: 200 Response
# default:
# content:
# application/json:
# schema:
# $ref: '#/components/schemas/Error'
# description: unexpected error
# security:
# - cookieAuth: []
# summary: Shows a breakdown of contributor relationships between repositories
#
########################################################################
"""
This is the committer relationship list renderer for Kibble
"""
import json
import time
import hashlib
import copy
import re
import math
def run(API, environ, indata, session):
# We need to be logged in for this!
if not session.user:
raise API.exception(403, "You must be logged in to use this API endpoint! %s")
now = time.time()
# First, fetch the view if we have such a thing enabled
viewList = []
if indata.get("view"):
viewList = session.getView(indata.get("view"))
if indata.get("subfilter"):
viewList = session.subFilter(indata.get("subfilter"), view=viewList)
dateTo = indata.get("to", int(time.time()))
dateFrom = indata.get(
"from", dateTo - (86400 * 30 * 6)
) # Default to a 6 month span
which = "committer_email"
role = "committer"
if indata.get("author", False):
which = "author_email"
role = "author"
interval = indata.get("interval", "day")
####################################################################
####################################################################
dOrg = session.user["defaultOrganisation"] or "apache"
query = {
"query": {
"bool": {
"must": [
{"range": {"tsday": {"from": dateFrom, "to": dateTo}}},
{"term": {"organisation": dOrg}},
]
}
}
}
# Source-specific or view-specific??
if indata.get("source"):
query["query"]["bool"]["must"].append(
{"term": {"sourceID": indata.get("source")}}
)
elif viewList:
query["query"]["bool"]["must"].append({"terms": {"sourceID": viewList}})
if indata.get("email"):
query["query"]["bool"]["must"].append(
{
"term": {
"committer_email"
if not indata.get("author")
else "author_email": indata.get("email")
}
}
)
# Get number of commits, this period, per repo
query["aggs"] = {"per_repo": {"terms": {"field": "sourceID", "size": 10000}}}
res = session.DB.ES.search(
index=session.DB.dbname, doc_type="code_commit", size=0, body=query
)
repos = {}
repo_commits = {}
authorlinks = {}
max_commits = 0
max_links = 0
max_shared = 0
max_authors = 0
minLinks = indata.get("links", 1)
# For each repo, count commits and gather data on authors
for doc in res["aggregations"]["per_repo"]["buckets"]:
sourceID = doc["key"]
commits = doc["doc_count"]
# Gather the unique authors/committers
query["aggs"] = {
"per_contributor": {
"terms": {
"field": "committer_email"
if not indata.get("author")
else "author_email",
"size": 10000,
}
}
}
xquery = copy.deepcopy(query)
xquery["query"]["bool"]["must"].append({"term": {"sourceID": sourceID}})
xres = session.DB.ES.search(
index=session.DB.dbname, doc_type="code_commit", size=0, body=xquery
)
authors = []
for person in xres["aggregations"]["per_contributor"]["buckets"]:
authors.append(person["key"])
if commits > max_commits:
max_commits = commits
repos[sourceID] = authors
repo_commits[sourceID] = commits
# Now, figure out which repos share the same contributors
repo_links = {}
repo_notoriety = {}
repodatas = {}
repo_authors = {}
# Grab data of all sources
for ID, repo in repos.items():
mylinks = {}
if not session.DB.ES.exists(index=session.DB.dbname, doc_type="source", id=ID):
continue
repodatas[ID] = session.DB.ES.get(
index=session.DB.dbname, doc_type="source", id=ID
)
for ID, repo in repos.items():
mylinks = {}
if not ID in repodatas:
continue
repodata = repodatas[ID]
oID = ID
if indata.get("collapse"):
m = re.search(indata.get("collapse"), repodata["_source"]["sourceURL"])
if m:
ID = m.group(1)
else:
ID = re.sub(r"^.+/", "", repodata["_source"]["sourceURL"])
for xID, xrepo in repos.items():
if xID in repodatas:
xrepodata = repodatas[xID]
if indata.get("collapse"):
m = re.search(
indata.get("collapse"), xrepodata["_source"]["sourceURL"]
)
if m:
xID = m.group(1)
else:
xID = re.sub(r"^.+/", "", xrepodata["_source"]["sourceURL"])
if xID != ID:
xlinks = []
for author in xrepo:
if author in repo:
xlinks.append(author)
lname = "%s@%s" % (ID, xID) # Link name
rname = "%s@%s" % (xID, ID) # Reverse link name
if len(xlinks) >= minLinks and not rname in repo_links:
mylinks[xID] = len(xlinks)
repo_links[lname] = repo_links.get(lname, 0) + len(
xlinks
) # How many contributors in common between project A and B?
if repo_links[lname] > max_shared:
max_shared = repo_links[lname]
if ID not in repo_notoriety:
repo_notoriety[ID] = set()
repo_notoriety[ID].update(
mylinks.keys()
) # How many projects is this repo connected to?
if ID not in repo_authors:
repo_authors[ID] = set()
repo_authors[ID].update(repo) # How many projects is this repo connected to?
if ID != oID:
repo_commits[ID] = repo_commits.get(ID, 0) + repo_commits[oID]
if repo_commits[ID] > max_commits:
max_commits = repo_commits[
ID
] # Used for calculating max link thickness
if len(repo_notoriety[ID]) > max_links:
max_links = len(repo_notoriety[ID])
if len(repo_authors[ID]) > max_authors:
max_authors = len(
repo_authors[ID]
) # Used for calculating max sphere size in charts
# Now, pull it all together!
nodes = []
links = []
existing_repos = []
for sourceID in repo_notoriety.keys():
lsize = 0
for k in repo_links.keys():
fr, to = k.split("@")
if fr == sourceID or to == sourceID:
lsize += 1
asize = len(repo_authors[sourceID])
doc = {
"id": sourceID,
"name": sourceID,
"commits": repo_commits[sourceID],
"authors": asize,
"links": lsize,
"size": max(5, (1 - abs(math.log10(asize / max_authors))) * 45),
"tooltip": "%u connections, %u contributors, %u commits"
% (lsize, asize, repo_commits[sourceID]),
}
nodes.append(doc)
existing_repos.append(sourceID)
for k, s in repo_links.items():
size = s
fr, to = k.split("@")
if fr in existing_repos and to in existing_repos:
doc = {
"source": fr,
"target": to,
"value": max(1, (size / max_shared) * 8),
"name": "%s ↔ %s" % (fr, to),
"tooltip": "%u committers in common" % size,
}
links.append(doc)
JSON_OUT = {
"maxLinks": max_links,
"maxShared": max_shared,
"widgetType": {"chartType": "link"}, # Recommendation for the UI
"links": links,
"nodes": nodes,
"interval": interval,
"okay": True,
"responseTime": time.time() - now,
}
yield json.dumps(JSON_OUT)