blob: ae2aadbe7580213c953baf672e1262d88061c08b [file] [log] [blame]
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Assign shepherds to podlings for an upcoming report cycle.
This script can only be run from a checkout of the Incubator's Subversion
repository. Assignments are written out to
`content/shepherd_assignments.json`; once this script has been run, the
modified file must be committed.
The roster of active shepherds is maintained in the file
`content/shepherds.json`.
Rules by which shepherds are assigned:
* Shepherds must not be Mentors for the podling.
* Shepherds will be assigned a maximum of 3 podlings per cycle, or fewer if
they choose. If there are not enough shepherds to meet demand, some
podlings will not receive shepherd assignments.
* If possible, no shepherd should be assigned the same podling multiple
times over the course of incubation.
* Shepherds may specify a whitelist of podlings which they will accept.
"""
import sys
if sys.version_info < (3, 2):
raise Exception("Python 3.2 or above is required")
import os
import re
import json
import pickle
import random
import datetime
import argparse
import xml.dom.minidom
from pprint import pprint
class Shepherd(object):
"""An Incubator Shepherd."""
def __init__(self, apache_id, name=None, max_podlings=3, whitelist=None):
"""
Return a Shepherd instance.
* apache_id -- The Shepherd's apache id.
* max_podlings -- Maximum podlings to review per month.
* whitelist -- An optional list of acceptable podling IDs.
"""
self._apache_id = apache_id
self._max_podlings = int(max_podlings)
self._name = name
self._whitelist = set(whitelist) if whitelist else None
self._assignments = {}
def accept(self, date, podling_id):
"""Indicate whether the proposed assignment is acceptable."""
return self._do_accept(date, podling_id, throw=False)
def _do_accept(self, date, podling_id, throw):
# If this shepherd has a whitelist, ensure that the podling is in it.
if self._whitelist:
if podling_id not in self._whitelist:
if throw:
raise ValueError("Podling not in whitelist")
return False
# Ensure that the shepherd has capacity to accept an assignment.
count = 0
if date in self._assignments:
count = len(self._assignments[date])
if count >= self._max_podlings:
if throw:
raise ValueError("Too many podling assignments this month")
return False
return True
def assign(self, date, podling_id):
"""
Attempt to assign a podling to the Shepherd for a specific report
date. Throw an exception if the assignment is not acceptable.
"""
self._do_accept(date=date, podling_id=podling_id, throw=True)
self.force_assign(date, podling_id)
def force_assign(self, date, podling_id):
"""Assign a podling to the shepherd. Always succeeds."""
if date not in self._assignments:
self._assignments[date] = set()
self._assignments[date].add(podling_id)
def podling_count(self, date):
"""
Return the number of podlings that the Shepherd has been assigned
for the given report date.
"""
if date in self._assignments:
return len(self._assignments[date])
return 0
def has_tended(self, podling_id):
"""
Indicate whether the shepherd has ever been assigned the specified
podling.
"""
for past in self._assignments:
if podling_id in past:
return True
return False
def whitelisted(self, podling_id):
"""
Indicate whether the shepherd is willing to accept the specified
podling.
"""
return self._whitelist and podling_id in self._whitelist
def get_apache_id(self):
return self._apache_id
def get_max_podlings(self):
return self._max_podlings
@staticmethod
def bulk_load(f):
"""
Parse a JSON file stream and returns a dict of (apache_id: Shepherd)
pairs.
"""
data = json.load(f)
shepherds = {}
for args in data:
shepherd = Shepherd(**args)
shepherds[shepherd.get_apache_id()] = shepherd
return shepherds
class Report(object):
"""An Incubator report to the ASF Board of Directors for a given date."""
def __init__(self, date):
"""
Return a Report instance.
* date -- A string of the format `YYYY-MM`.
"""
if not re.match("\\d{4}-\\d{2}", date):
raise ValueError("Invalid date")
self._date = date
self._month = int(date[5:])
self._assignments = {}
def get_date(self):
return self._date
def assign(self, podling_id, shepherd):
"""Attempt to assign a podling to the specified shepherd."""
if shepherd is not None and not isinstance(shepherd, Shepherd):
raise TypeError("Not a Shepherd")
if podling_id in self._assignments:
raise ValueError("Podling " + podling_id + " already assigned")
self._assignments[podling_id] = shepherd
def shepherd(self, podling_id):
"""Return the Shepherd assigned to `podling_id`, if any."""
if podling_id in self._assignments:
return self._assignments[podling_id]
return None
def podlings(self):
"""Return the podlings reporting this cycle as a `set` of IDs"""
return set(self._assignments.keys())
def _select_shepherd(self, podling, shepherds):
# Exclude mentors and inactive shepherds.
shep_list = []
for shep in shepherds.values():
if shep.get_max_podlings():
if not podling.has_mentor(shep.get_apache_id()):
shep_list.append(shep)
# Try to distribute podlings evenly amongst the shepherds.
random.shuffle(shep_list)
shep_list.sort(key = lambda shep: shep.podling_count(self._date))
# First, try to assign the podling to someone who's got it whitelisted.
for shep in shep_list:
if shep.whitelisted(podling.get_id()):
if shep.accept(podling_id=podling.get_id(), date=self._date):
return shep
# Try to assign the podling to someone who hasn't shepherded it before.
for shep in shep_list:
if shep.has_tended(podling.get_id()):
continue
if shep.accept(podling_id=podling.get_id(), date=self._date):
return shep
# Find someone who's got the time.
for shep in shep_list:
if shep.accept(podling_id=podling.get_id(), date=self._date):
return shep
# Nobody's available.
return None
def assign_shepherds(self, podlings, shepherds, reports):
"""
Assign shepherds to this report.
* podlings -- a dict of (podling_id: Podling) pairs.
* shepherds -- a dict of (apache_id: Shepherd) pairs.
* reports -- a dict of ("YYYY-MM": Report) pairs.
"""
shuffled = list(podlings.values())
random.shuffle(shuffled)
for podling in shuffled:
if podling.report_due(self._month):
shep = self._select_shepherd(podling, shepherds)
if shep is not None:
shep.assign(date=self._date, podling_id=podling.get_id())
self.assign(podling_id=podling.get_id(), shepherd=shep)
@staticmethod
def bulk_load(shepherds, f):
"""
Parse a JSON file stream and return a dict of ("YYYY-MM": Report)
pairs.
As a side effect, update `shepherds` by assigning podlings from past
reports.
* shepherds: A dict of (apache_id: Shepherd) pairs.
* f: A readable file stream.
"""
data = json.load(f)
reports = {}
for date in data:
report = reports[date] = Report(date=date)
for podling_id, shepherd_id in data[date].items():
if shepherd_id and shepherd_id not in shepherds:
# Add past shepherds to roster, but indicate that they are
# inactive by giving them max_podlings=0.
shep = Shepherd(apache_id=shepherd_id, max_podlings=0)
shepherds[shepherd_id] = shep
shepherd = shepherds[shepherd_id] if shepherd_id else None
report.assign(podling_id=podling_id, shepherd=shepherd)
if shepherd:
shepherd.force_assign(date=date, podling_id=podling_id)
return reports
@staticmethod
def bulk_dump(reports, f):
"""
Write out a dict of ("YYYY-MM": Report) pairs to a JSON file stream,
capturing shepherd assignments.
"""
data = {}
for date, report in reports.items():
assigned = {}
data[report.get_date()] = assigned
for podling_id in report.podlings():
shepherd = report.shepherd(podling_id)
apache_id = shepherd.get_apache_id() if shepherd else None
assigned[podling_id] = apache_id
json.dump(data, f, indent=4, sort_keys=True, separators=(",", ": "))
class Podling(object):
"""An Incubator podling."""
def __init__(self, podling_id, group, monthly):
"""
Return a Podling instance.
* podling_id -- The resource identifier for the podling.
* group -- Reporting group (1, 2 or 3).
* monthly -- Whether podling currently reports monthly.
"""
self._id = podling_id
self._monthly = monthly
self._group = group
self._mentors = set()
def add_mentor(self, mentor):
"""Add a mentor to the podling."""
self._mentors.add(mentor)
def has_mentor(self, apache_id):
"""Indicate whether `apache_id` mentors the podling."""
return apache_id in self._mentors
def get_id(self):
"""Return the podlings string resource identifier."""
return self._id
def report_due(self, month):
"""
Indicate whether the podling will have a report due during the
specified month.
"""
if self._monthly:
return True
if (((month - 1) % 3) + 1) == self._group:
return True
return False
@staticmethod
def bulk_load(f):
"""
Parse a podlings.xml file stream and return a dict of
(podling_id: Podling) pairs.
"""
podlings = {}
dom = xml.dom.minidom.parse(f)
for row in dom.getElementsByTagName("podling"):
if row.getAttribute("status") != 'current':
continue
podling_id = row.getAttribute("name").strip()
podling_id = podling_id.lower()
podling_id = podling_id.replace(' ', '')
reporting = row.getElementsByTagName("reporting")
if not reporting:
raise Exception(
"podlings.xml is missing 'reporting' for " + podling_id
)
monthly = True if reporting[0].getAttribute("monthly") else False
group = int(reporting[0].getAttribute("group"))
podling = Podling(podling_id=podling_id, monthly=monthly,
group=group)
podlings[podling_id] = podling
for mentor_data in row.getElementsByTagName("mentor"):
mentor_name = mentor_data.getAttribute("username").strip()
podling.add_mentor(mentor_name)
return podlings
def repos_root():
"""Return the root dir of the Incubator version control checkout."""
return os.path.dirname(os.path.abspath(__file__))
def main():
# Process arguments and load data.
options = process_cli_args()
content_dir = os.path.join(repos_root(), 'content')
podlings_xml_path = os.path.join(content_dir, 'podlings.xml')
shepherds_path = os.path.join(content_dir, 'shepherds.json')
assignments_path = os.path.join(content_dir, 'shepherd_assignments.json')
with open(podlings_xml_path, 'r') as f:
podlings = Podling.bulk_load(f=f)
with open(shepherds_path, 'r') as f:
shepherds = Shepherd.bulk_load(f=f)
with open(assignments_path, 'r') as f:
reports = Report.bulk_load(shepherds=shepherds, f=f)
# See whether the assignments have already been made for the given month.
if options.date in reports:
print("Assignments for {} already complete.".format(options.date))
sys.exit(0)
# Perform assignments and dump to `content/shepherds_assignments.json`.
report = reports[options.date] = Report(date=options.date)
report.assign_shepherds(podlings=podlings,
shepherds=shepherds,
reports=reports)
os.remove(assignments_path)
with open(assignments_path, 'w') as f:
Report.bulk_dump(reports=reports, f=f)
print("Updated {}".format(assignments_path))
def process_cli_args():
parser = argparse.ArgumentParser()
parser.add_argument('--month', type=int, default=0,
help="month number (1-12)")
options = parser.parse_args()
now = datetime.datetime.now()
if options.month == 0:
options.month = (now.month % 12) + 1
if options.month < now.month:
options.year = now.year + 1
else:
options.year = now.year
options.date = "{0:04d}-{1:02d}".format(options.year, options.month)
return options
if __name__ == '__main__':
main()