blob: 135b60e946892a8cd574d7c97fccfecdb3aa89a0 [file] [log] [blame]
"""
Module to give access to data from committee-info.json
This module acts as the gatekeeper for all access to committee-info.json
which is cached from https://whimsy.apache.org/public/committee-info.json
"""
import sys
if sys.hexversion < 0x03000000:
raise ImportError("This script requires Python 3")
import os
from os.path import dirname, abspath, join
from inspect import getsourcefile
import urllib.request
import time
import calendar
import json
MYHOME = dirname(abspath(getsourcefile(lambda:0))) # automatically work out home location so can run the code anywhere
# we assume that this script is located one level below the top
COMDEV_HOME=dirname(MYHOME)
CACHE_DIR=join(COMDEV_HOME,'data','cache')
URL='https://whimsy.apache.org/public/committee-info.json'
NAME='committee-info.json'
FILE=join(CACHE_DIR, NAME)
print(FILE)
INTERVAL=300 # code won't recheck for updated HTTP file until this number of seconds has elapsed
# time format used in Last-Modified/If-Modified-Since HTTP headers
HTTP_TIME_FORMAT = '%a, %d %b %Y %H:%M:%S GMT'
cidata = {} # The data read from the file
# get file mod date in suitable format for If-Modified-Since
def mod_date(t):
return time.strftime(HTTP_TIME_FORMAT, time.gmtime(t))
# get file mod_date
def file_mtime(filename):
try:
t = os.path.getmtime(filename)
except FileNotFoundError:
t = -1 # distinguish from no modTime in http response
return t
# download url as file if the cached copy is too old
def get_url_if_newer(url, dir, name):
path=join(dir,name)
fileTime = file_mtime(path)
check = join(dir,".checked_"+name)
if fileTime >= 0:
checkTime = file_mtime(check)
now = time.time()
if checkTime > (now - INTERVAL):
print("Recently checked %d\n%d\n%d, skip check" % (INTERVAL, checkTime, now))
return
else:
print("Not recently checked\n%d\n%d" % (checkTime, now))
else:
print("Not found %s" % name)
sinceTime = mod_date(fileTime)
headers = {"If-Modified-Since" : sinceTime}
req = urllib.request.Request(url, headers=headers)
try:
response = urllib.request.urlopen(req)
lastMod = response.headers['Last-Modified']
lastModT = calendar.timegm(time.strptime(lastMod, HTTP_TIME_FORMAT))
outFile = path + ".tmp"
with open(outFile,'wb') as f:
f.write(response.read())
f.close()
# store the last mod time as the time of the file
os.utime(outFile, times=(lastModT, lastModT))
os.rename(outFile, path) # seems to preserve file mod time
print("Downloaded new version of %s " % path)
except urllib.error.HTTPError as err:
if not err.code == 304:
raise
else:
print("Cached copy of %s is up to date" % path)
with open(check,'a'):
os.utime(check, None) # touch the marker file
def update_cache():
global cidata # Python defaults to creating a local variable
get_url_if_newer(URL, CACHE_DIR, NAME)
with open(FILE, "r", encoding='utf-8') as f:
cidata = json.loads(f.read())
f.close()
update_cache() # done when loading
def chairs():
committees = cidata['committees']
chairjson={}
for ctte in committees:
c = committees[ctte]
if not c['pmc']:
continue
chs = c['chair']
ch = None
for ch in chs: # allow for multiple chairs
break
name = 'Apache %s' % c['display_name']
if ch:
chairjson[name] = chs[ch]['name']
return chairjson
def cycles():
committees = cidata['committees']
cycles={}
for ctte in committees:
c = committees[ctte]
if not c['pmc']:
continue
cycles[ctte] = c['report']
# Duplicate some entries for now so the code can find them (the existing json has the duplicates)
if ctte == 'ws': # Special processing
cycles['webservices'] = cycles[ctte]
if ctte == 'httpd': # Special processing
cycles['http server'] = cycles[ctte]
return cycles
"""
Returns an array of entries of the form:
"abdera": {
"fullname": "Apache Abdera",
"mail_list": "abdera",
"established": "2008-11",
"report": [
"February",
"May",
"August",
"November"
],
"reporting": 2,
"chair": {
"nick": "antelder",
"name": "Ant Elder"
},
"pmc": true
},
"""
def committees():
committees = {}
cttes = cidata['committees']
for ent in cttes:
ctte = cttes[ent]
c = {}
for key in ctte:
# some keys need special processing
if key == 'display_name':
basename = ctte['display_name']
c['fullname'] = "Apache %s" % ('mod_perl' if basename == 'Perl' else basename)
elif key == 'chair':
c['chair'] = None
for ch in ctte['chair']:
c['chair'] = {
'nick': ch,
'name': ctte['chair'][ch]['name']}
elif key == 'established':
value = ctte[key]
if value:
value = "%s-%s" % (value[3:7], value[0:2]) # extract year and month
c[key] = value
elif key == 'report':
c[key] = ctte[key] # save original values
value = ctte[key]
if 'January' in value:
c['reporting'] = 1
elif 'February' in value:
c['reporting'] = 2
elif 'March' in value:
c['reporting'] = 3
elif 'Every month' in value:
c['reporting'] = 0
else:
c[key] = ctte[key]
committees[ent]=c
return committees
def pmcdates():
dates = {}
cttes = cidata['committees']
for ent in cttes:
ctte = cttes[ent]
if not ctte['pmc']:
continue
roster = ctte['roster']
est = ctte['established']
date = 0
if not est == None:
# convert mm/yyyy to date (drop any subsequent text)
try:
date = calendar.timegm(time.strptime(est[0:7], '%m/%Y'))
except Exception as e:
print("Date parse error for %s: %s %s" % (ent, est, e))
pass
dates[ent] = {'pmc': [est, date], 'roster': {} }
ids = {}
for id in roster:
rid = roster[id]
try:
date = calendar.timegm(time.strptime(rid['date'], '%Y-%m-%d'))
except:
date = 0
ids[id] = [rid['name'], date]
dates[ent]['roster'] = ids
# The 'CI' internal name for Web Services is 'ws' but reporter code originally used 'webservices'
if ent == 'ws':
dates['webservices'] = dates[ent]
return dates