clutch.py - incubator - Git at Google

 #!/usr/bin/env python

 # Licensed to the Apache Software Foundation (ASF) under one or more
 # contributor license agreements.  See the NOTICE file distributed with
 # this work for additional information regarding copyright ownership.
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.

 '''
 Purpose: Clutch gathers details about projects currently in incubation.

 The core resource is the SITE_CONTENT/podlings.xml file. As soon as a project is
 accepted into incubation, please add its entry.
 This script reads the SITE_CONTENT/podlings.xml table, and each podling status page, and
 other resources. The assembled metadata is stored in various data files.

 See further explanation at http://incubator.apache.org/clutch.html

 Note: Please keep the dependencies as minimal as possible, so this script can
 be operated by any Incubator committer. It uses only standard modules.

 Note: The 'svn log' queries might only run on UNIX, YMMV.
 '''

 '''
 External input data files used:
 - SITE_CONTENT/podlings.xml

 URLs
 http://people.apache.org/~crossley/incubator-keys.txt

 Created on minotaur using:
 find /www/www.apache.org/dist/incubator \
   -iname "*KEYS*" | grep -v "\.svn\/" > ~/public_html/incubator-keys.txt

 http://people.apache.org/~crossley/incubator-releases.txt

 Created on minotaur using:
 find /www/www.apache.org/dist/incubator \
   -iname "*incubat*gz.asc" -o -iname "*incubat*gz.sig" \
   -o -iname "*incubat*bz2.asc" -o -iname "*incubat*bz2.sig" \
   -o -iname "*incubat*zip.asc" -o -iname "*incubat*zip.sig" \
   > ~/public_html/incubator-releases.txt

 http://people.apache.org/~crossley/incubator-releases-bad-filename.txt

 Created on minotaur using:
 find /www/www.apache.org/dist/incubator \
   -iname "*gz.asc" -o -iname "*gz.sig" \
   -o -iname "*bz2.asc" -o -iname "*bz2.sig" \
   -o -iname "*zip.asc" -o -iname "*zip.sig" \
   | sed 's/.*\/incubator\///' \
   | grep -v incubat \
   > ~/public_html/incubator-releases-bad-filename.txt

 The above has now been replaced by parsing the output of
 'svn', 'ls', '-R', 'https://dist.apache.org/repos/dist/release/incubator'

 asf-authorization-template from Git deployment branch
 http://mail-archives.apache.org/mod_mbox/
 http://www.apache.org/dist/incubator/<resource>
 http://svn.apache.org/repos/asf/incubator

 SVN commands
 'svn', 'ls', '-R', 'https://dist.apache.org/repos/dist/release/incubator'
 'svn', 'ls', '--xml', 'http://svn.apache.org/repos/asf/incubator/'
 'svn', 'log', '--xml', 'SITE_CONTENT/projects/{0}.xml' {status file}

 Output data files created:
 SITE_CONTENT/clutch.txt
 SITE_CONTENT/clutcho1.ent
 SITE_CONTENT/clutcht.ent
 SITE_CONTENT/clutchr1.ent
 SITE_CONTENT/clutchr2.ent
 SITE_CONTENT/clutcho2.ent
 SITE_CONTENT/clutchm.ent
 SITE_CONTENT/clutchmy.ent
 SITE_CONTENT/report_due_1.txt
 SITE_CONTENT/report_due_2.txt
 SITE_CONTENT/report_due_3.txt

 Pickle file:
 - clutch.pkl (I/O)
 '''

 # FIXME: Mail list detection could be improved.
 # FIXME: Mail list detection. See svn comments with 2009-11-13 rush bug fix.
 # FIXME: Occasional trailing slash issue in Clutch cache.
 # FIXME: Some projects use different names in different contexts, and cannot
 #        be automatically handled, e.g. Lucene.Net, log4php (some of their stats
 #        are missing).
 #        See beginning attempt to handle this with "resourceNames".
 # FIXME: Perhaps send some error reporting to a log file:
 #        - validate the dates.
 #        - detect short description, e.g. Hama = Hama
 # FIXME: Better/more exception handling, e.g. url open
 # FIXME: Need various output formats:
 #        - source docs xml file in clutch*.ent (now happening)
 #        - simple text list of project names and basic data clutch.txt (now happening)
 #        - Notation3 or DOAP or RDFa or some such? (not yet)
 #        - python pickle (now happening)
 # FIXME: Parse Robert's "audit" stuff.
 # FIXME: Detect if they have SVN repo yet.
 #        - http://svn.apache.org/repos/asf/incubator/* ensure more than ".."
 # FIXME: Similarly with website. Ensure that there is some content length.
 # FIXME: Get better hints from Status pages, e.g. sometimes they don't link
 #        to their "tracker" etc. they just use text.
 # FIXME: News parser gets extra committer if source has commented xml template.
 # FIXME: Use fragments via other files for the sets of html notes.
 # FIXME: See some other suggestions on the general@ list.
 # FIXME: See some other suggestions in clutch.html#notes-2
 # FIXME: Better deal with input/output/unicode.
 # FIXME: See some other suggestions in issue INCUBATOR-78.

 import sys
 if sys.version_info < (3, 2):
     raise Exception("Python 3.2 or above is required")

 import subprocess
 from subprocess import Popen, PIPE
 import datetime
 from html.parser import HTMLParser
 import os.path
 import pickle
 import pprint
 import re
 import urllib.request
 import urllib.error
 import urllib.parse
 import xml.dom.minidom
 import argparse
 import io

 # constants for external data ---
 GIT = 'https://git-wip-us.apache.org/repos/asf?p=infrastructure-puppet.git;hb=refs/heads/deployment;a=blob_plain;f=modules/subversion_server/files/authorization/%s'
 ASF = 'asf-authorization-template'
 # PIT='pit-authorization-template'

 MAIL_LIST_URL = "http://mail-archives.apache.org/mod_mbox/"

 # Constant for site content location ---

 SITE_CONTENT_DIR = 'content/'

 parser = argparse.ArgumentParser(
     description='Gather details about projects currently in incubation.')
 parser.add_argument('--ignoreState',    action='store_true',
                     default='False', help='Ignore state (default false)')
 parser.add_argument('-v', '--verbose',  action='store_true',
                     default='False', help='verbose mode (default false)')
 parser.add_argument('-q', '--quiet',    action='store_true',
                     default='False', help='quiet mode (default false)')
 parser.add_argument('-x', '--external', action='store_true', default='False',
                     help='log external requests (e.g. svn, http) (default false)')
 args = parser.parse_args()

 # Normal level of info
 optionInfo = args.quiet != True

 # Issue some extra debug information.
 optionVerbose = args.verbose == True
 if optionVerbose:
     optionInfo = True

 # Use the persistent data to speed operations.
 # Occasionally bad data is cached (e.g. experimenting with developing new code).
 # So need to ignore the cached data and perform all resource availability
 # tests.
 optionUseClutchState = args.ignoreState != True

 # Should we log external requests?
 optionExternal = args.external == True

 # Utility functions ----


 def logexternal(string):
     if optionExternal:
         print("External: " + string)


 def getUrl(url, encoding=None, errors=None):
     logexternal(url)
     # ensure invalid URLs don't cause long wait
     resp = urllib.request.urlopen(url, timeout=5)
     if encoding:
         return io.TextIOWrapper(resp, encoding=encoding, errors=errors)
     else:
         return resp


 def osExec(list):
     logexternal(" ".join(list))
     return subprocess.Popen(list, stdout=subprocess.PIPE).communicate()[0]


 def osPopen(list):
     logexternal(" ".join(list))
     return subprocess.Popen(list, stdout=subprocess.PIPE, universal_newlines=True)


 def getText(nodelist):
     """http://www.python.org/doc/2.5.2/lib/minidom-example.txt"""
     rc = ""
     for node in nodelist:
         if node.nodeType == node.TEXT_NODE:
             rc = rc + node.data
     return rc


 def normaliseSVNurl(url):
     rc = url.replace('https://', 'http://')
     if not rc[-1] == '/':
         rc = rc + '/'
     return rc

 def checkStatus(k, projectList, status):
     statusFile = SITE_CONTENT_DIR + "projects/{0}.xml".format(k)
     e = projectList[k]
     if os.path.exists(statusFile):
         try:
             dom = xml.dom.minidom.parse(statusFile)
             span = dom.getElementsByTagName("span")
             if (len(span) < 1):
                 print("INFO: Missing from status file: "+statusFile)
                 print("      <p><span class='{2}'>The {0} project {2} on {1}</span></p>".format(e['name'], e['enddate'], status))
         except (Exception) as e:
             print("Exception processing " + statusFile + " : " + str(e))
             raise
     else:
         print("WARN: Cannot find {0}".format(statusFile))

 projects = {}  # internal data, derived from podlings.xml
 otherIssues = []
 persist = {}  # persistent data to be utilised by other tools
 mentorsProjects = {}  # internal data

 gatherDate = datetime.datetime.utcnow()
 gatherDateString = datetime.datetime.utcnow().ctime()
 delta = datetime.timedelta(days=61)
 statusTallyDate1 = gatherDate - delta
 delta = datetime.timedelta(days=122)
 statusTallyDate2 = gatherDate - delta
 delta = datetime.timedelta(days=273)
 statusTallyDate3 = gatherDate - delta

 # Regualar expressions ---
 # These expressions are used often, so compile them early.
 startDateRE = re.compile("([0-9]+)-0?([0-9]+)-?0?([0-9]+)?")
 statusLogRE = re.compile("^([0-9]+)-0?([0-9]+)-0?([0-9]+)")
 svnRevisionSkipRE = re.compile(
     "707389|708087|708420|708791|709356|709648|711153|744365|761864|788239|796085|804825|894972|940767|959869|1065888|1153764|1159079|1373730|1479744|1494479|1515212")
 mailListRE = re.compile("^([-a-z0-9]+)@([a-z]+)\.apache\.org")
 mailListNameRE = re.compile("^([a-z]+)-([-a-z0-9]+)")
 mailListNameUrlRE = re.compile("/([a-z]+)-([-a-z0-9]+)/$")
 urlHttpRE = re.compile("^http")
 newCommitterRE = re.compile("[nN]ew [cC]omm?itt?ers? ?\(?([0-9]+)?")
 distMirrorRE = re.compile("cgi/incubator/([-a-z0-9]+)/")

 # Import the persistent data ---
 # This enables us to skip detection of website etc. if already detected.
 inputFile = open('clutch.pkl', 'rb')
 state = pickle.load(inputFile)
 inputFile.close()

 # Parse the podlings data file ---
 dom = xml.dom.minidom.parse(SITE_CONTENT_DIR + "podlings.xml")

 graduatedProjects = {}
 graduatingOrRetiring = []
 retiredProjects = {}

 print("Gather data from podlings.xml ...")
 for row in dom.getElementsByTagName("podling"):
     name = row.getAttribute("name").strip()
     id = name.lower()
     id = id.replace(' ', '')  # strip spaces from project ID
     startDate = row.getAttribute("startdate")
     endDate = row.getAttribute("enddate")

     if row.getAttribute("status") == 'graduated':
         resource = row.getAttribute("resource")
         graduatedProjects[resource.lower()] = {'name': name, 'enddate': endDate}

     if row.getAttribute("status") == 'retired':
         resource = row.getAttribute("resource")
         retiredProjects[resource.lower()] = {'name': name, 'enddate': endDate}

     if row.getAttribute("status") == 'current':
         #print("Name: " + name)

         if id in projects:
             print("ERROR: {0}: row exists".format(id))
         else:
             projects[id] = {}
             # strip spaces from project name (as per original ReportingSchedule)
             # TODO is this still needed? Or should the @name attribute not
             # contain spaces?
             projects[id]['fullName'] = name
             projects[id]['name'] = name.replace(' ', '')
             # Set some defaults
             needMetadata = False
             projects[id]['reportingMonthly'] = False
             projects[id]['reportingComments'] = ""
             projects[id]['hasReportingGroup'] = True
             # currently needed for reporting phase
             projects[id]['reportingGroup'] = 'month'
             projects[id]['hasStatusEntry'] = True
             projects[id]['statusFileName'] = id
             projects[id]['statusLastUpdated'] = ""
             projects[id]['statusAge'] = 0
             projects[id]['statusUpdateCounts'] = ""
             projects[id]['urlSvn'] = ""
             projects[id]['urlTracker'] = ""
             projects[id]['urlWww'] = ""
             projects[id]['urlDist'] = ""
             projects[id]['urlKeys'] = ""
             projects[id]['hasEntryIssues'] = False
             projects[id]['resourceNames'] = [id]
             # Some projects use an alternate short resource name
             # rather than their project name
             alias = row.getAttribute("resource")
             if (alias != '' and alias != id):
                 projects[id]['resourceNames'].append(alias)
             for alias in row.getAttribute("resourceAliases").split(','):
                 if alias != '':
                     projects[id]['resourceNames'].append(alias)
             projects[id]['entryDate'] = None
             projects[id]['committersSvn'] = None
             projects[id]['hintMailListDev'] = ""
             projects[id]['hasMailListDev'] = ""
             projects[id]['hintMailListCommits'] = ""
             projects[id]['hasMailListCommits'] = ""
             projects[id]['numberCommitters'] = 0
             projects[id]['numberCommittersNew'] = 0

             projects[id]['hasClutchState'] = id in state
             descElements = row.getElementsByTagName("description")
             projects[id]['description'] = getText(descElements[0].childNodes)
             if 'FIXME' in projects[id]['description']:
                 needMetadata = True
             projects[id]['sponsor'] = row.getAttribute("sponsor")
             projects[id]['startDate'] = startDate
             projects[id]['statusFileName'] = row.getAttribute("resource")
             mentors = [mentor.firstChild.data.strip()
                        for mentor in row.getElementsByTagName("mentor")]
             projects[id]['mentors'] = mentors
             if 'FIXME' in mentors:
                 needMetadata = True
             if needMetadata:
                 errorMsg = "{0}: Need to add incubation metadata.".format(id)
                 print('ERROR:', errorMsg)
                 errorMsg += " Please maintain your records in the content/podlings.xml file. See <a href=\"#h-hasStatusEntry\">help</a>."
                 otherIssues.append(errorMsg)

             # determine projects for each mentor
             for mentor in mentors:
                 try:
                     mentorsProjects[mentor]
                 except KeyError:
                     mentorsProjects[mentor] = []
                 mentorsProjects[mentor].append(name)

             isGraduating = row.getElementsByTagName("graduating").length > 0
             if isGraduating:
                 graduatingOrRetiring.append(id)
                 if not row.getAttribute("endDate"):
                     errorMsg = "{0}: Has graduated, but still needs to follow the graduation steps.".format(
                         id)
                     print('ERROR:', errorMsg)
                     errorMsg += " See <a href=\"#h-Graduate\">help</a>."
                     otherIssues.append(errorMsg)

             isRetiring = row.getElementsByTagName("retiring").length > 0
             if isRetiring:
                 graduatingOrRetiring.append(id)
                 if not row.getAttribute("endDate"):
                     errorMsg = "{0}: Has retired, but still needs to follow the retirement steps.".format(
                         id)
                     print('ERROR:', errorMsg)
                     errorMsg += " See <a href=\"#h-Retire\">help</a>."
                     otherIssues.append(errorMsg)

             # Is it reporting monthly?
             reporting = row.getElementsByTagName("reporting")
             if reporting.length != 1:
                 projects[id]['hasReportingGroup'] = False
                 if not isGraduating:
                     print(
                         "ERROR: {0}: expecting a single reportgroup".format(name))
             else:
                 if reporting[0].getAttribute("monthly") == 'true':
                     projects[id]['reportingMonthly'] = True
                     projects[id]['reportingComments'] = getText(reporting)
                     projects[id]['hasEntryIssues'] = True
                 group = reporting[0].getAttribute("group")
                 if group == None:
                     print("ERROR: {0}: missing group attribute".format(name))
                     projects[id]['hasReportingGroup'] = False
                 else:
                     projects[id]['reportingGroup'] = 'group-' + group

 dom.unlink()

 for k in sorted(graduatedProjects):
     checkStatus(k, graduatedProjects, 'graduated')
 for k in sorted(retiredProjects):
     checkStatus(k, retiredProjects, 'retired')

 # Process the incubation table data, detect some potential issues. ---

 print("Gather details from project status files ...")
 projectNames = list(projects.keys())
 for k in sorted(projectNames, key=str.lower):
     if optionVerbose:
         print("DEBUG: Processing status file for {0}".format(k))

     # Append more potential alternate names for a project
     if projects[k]['statusFileName'] not in projects[k]['resourceNames']:
         projects[k]['resourceNames'].append(projects[k]['statusFileName'])
     if optionVerbose and len(projects[k]['resourceNames']) > 1:
         print("DEBUG: Will try alternate names: {0}".format(
             projects[k]['resourceNames']))

     # parse their project status file to extract specific information
     statusFile = SITE_CONTENT_DIR + \
         "projects/{0}.xml".format(projects[k]['statusFileName'])
     if os.path.exists(statusFile):
         try:
             dom = xml.dom.minidom.parse(statusFile)
         except (Exception) as e:
             print("Exception processing " + statusFile + " : " + str(e))
             raise
         # get the project info hints
         if optionVerbose:
             print("DEBUG: Gather hints from project Status page")
         table = dom.getElementsByTagName("table")[0]
         for row in table.getElementsByTagName("tr")[1:]:
             if (len(row.getElementsByTagName("td")) < 3):
                 continue
             cell = row.getElementsByTagName("td")[2]
             if 'id' in cell.attributes:
                 values = [getText(item.childNodes) for item in cell.childNodes]
                 value = " ".join(values).strip()
                 if value == "":
                     value = getText(cell.childNodes).strip()
                 if optionVerbose:
                     print("DEBUG: Hint: {0}={1}".format(
                         cell.getAttribute('id'), value))
                 if cell.getAttribute('id') == "mail-dev":
                     value = value.replace('  at  ', '@')
                     value = value.replace('  Subscribe  Unsubscribe', '')
                     value = value.replace('  Archive', '')
                     value = value.replace(' ', '@', 1)
                     value = value.replace(' ', '')
                     value = value.replace('@@', '@')
                     matchMail = re.search(mailListRE, value)
                     if matchMail:
                         projects[k][
                             'hintMailListDev'] = "{0}-{1}".format(matchMail.group(2), matchMail.group(1))
                     continue
                 if cell.getAttribute('id') == "mail-commits":
                     value = value.replace('  at  ', '@')
                     value = value.replace('  Subscribe  Unsubscribe', '')
                     value = value.replace('  Archive', '')
                     value = value.replace(' ', '@', 1)
                     value = value.replace(' ', '')
                     value = value.replace('@@', '@')
                     matchMail = re.search(mailListRE, value)
                     if matchMail:
                         projects[k][
                             'hintMailListCommits'] = "{0}-{1}".format(matchMail.group(2), matchMail.group(1))
                     continue
                 # Get hints for various url-based resources
                 matchUrl = re.search(urlHttpRE, value)
                 if not matchUrl:
                     for item in cell.getElementsByTagName('a'):
                         if 'href' in item.attributes:
                             value = item.getAttribute('href')
                             break
                 hasUrl = re.search(urlHttpRE, value)
                 if cell.getAttribute('id') == "svn" and hasUrl:
                     projects[k]['urlSvn'] = value
                     continue
                 if cell.getAttribute('id') == "tracker" and hasUrl:
                     projects[k]['urlTracker'] = value
                     continue
                 if cell.getAttribute('id') == "www" and hasUrl:
                     projects[k]['urlWww'] = value
                     continue
         # Scan the project News section and count new commiters.
         for section in dom.getElementsByTagName("section"):
             if 'id' in section.attributes and section.getAttribute('id') == "News":
                 for line in section.toxml().splitlines():
                     if ('<!--' in line):
                         continue
                     matchNewCommitter = re.search(newCommitterRE, line)
                     if matchNewCommitter:
                         if matchNewCommitter.group(1):
                             projects[k][
                                 'numberCommittersNew'] += int(matchNewCommitter.group(1))
                         else:
                             projects[k]['numberCommittersNew'] += 1
         dom.unlink()
     # end of if status file exists

 # end of processing incubation table data

 # Gather committers data ---

 print("Gather committers data ...")
 # Parse the locally defined groups directly
 committers_projects = {}
 with getUrl(GIT % ASF, encoding='UTF-8') as f:
     for line in f:  # skip the header
         if line.startswith('[groups]'):
             break

     for line in f:  # read the defs section
         line = line.rstrip()
         if re.match(r"^(#|\s*$)", line):  # comment or blanks
             continue
         if re.match(r"^\[/\]", line):  # end of definition section
             break
         m = re.match(r"^\s*(\w\S+?)\s*=\s*(\S+)?$", line)
         if m:
             entry = m.group(1)
             value = m.group(2)
             if value:  # ignore empty groups
                 if value.startswith('{'):
                     continue
                 committers_projects[entry] = value.split(',')
 # pprint.pprint(committers_projects)

 # Gather incubator group mail list data ---

 print("Gather incubator group mail list data ...")


 class IncubatorMailListNamesParser(HTMLParser):

     def __init__(self):
         self.strict = True
         self.names = []
         self.newStyle = []
         self.convert_charrefs = False
         self.reset()

     def handle_starttag(self, tag, attrs):
         # Get the newStyle projects
         if tag == "option":
             for key, value in attrs:
                 if (key == "value" and ".incubator" in value):
                     value = value.replace('.incubator', '')
                     self.newStyle.append(value)

         # Get all Incubator lists
         if tag == "a":
             for key, value in attrs:
                 if (key == "href" and "incubator" in value):
                     value = value.replace('incubator-', '')
                     value = value.replace('/', '')
                     self.names.append(value)
                     break

 mailLists = IncubatorMailListNamesParser()
 mailLists.feed(getUrl(MAIL_LIST_URL).read().decode('utf-8'))
 mailLists.close()
 if optionVerbose:
     pprint.pprint(mailLists.names)
     pprint.pprint(mailLists.newStyle)

 projectMailLists = {}
 mailListNamesRE = re.compile("(.*)-([^-]+)")
 mailListNamesUsualRE = re.compile(
     "announce|commits|cvs|dev|issues|notifications|user|users|spec")
 for listName in mailLists.names:
     if listName in ["announce", "cvs", "general", "projects"]:
         continue
     if optionVerbose:
         print("DEBUG: listName=" + listName)
     if ('-' in listName):
         matchList = re.search(mailListNamesRE, listName)
         try:
             projectMailLists[matchList.group(1)]
         except KeyError:
             projectMailLists[matchList.group(1)] = {}
         listName = listName.replace('/', '')
         projectMailLists[matchList.group(1)][matchList.group(2)] = listName
         if optionVerbose:
             print("DEBUG: Found list: {0} {1}".format(
                 matchList.group(1), matchList.group(2)))
             if (matchList.group(1) not in mailLists.newStyle):
                 print("DEBUG: Uses oldStyle list set-up")
         # FIXME: We assume that mail lists are always named like this
         # with "-dev" or "-commits" etc.
         matchListUsual = re.search(mailListNamesUsualRE, matchList.group(2))
         if optionVerbose and not matchListUsual:
             print("WARN: Unusual mail list name '{0}'".format(listName))
     else:
         listName = listName.replace('/', '')
         try:
             projectMailLists[listName]
         except KeyError:
             projectMailLists[listName] = {}
         projectMailLists[listName]['dev'] = listName
         print("WARN: {0}: unusual mail list name '{1}', assuming it is their dev list".format(
             listName, projectMailLists[listName]['dev']))
 if optionVerbose:
     print("DEBUG: projectMailLists")
     pprint.pprint(projectMailLists)

 # Gather incubator PGP keys data ---

 print("Gather incubator PGP keys data and releases ...")

 keysList = {}
 releases = {}
 releasesBadName = {}
 distareas = {}  # podlings with dist areas

 with osPopen(['svn', 'ls', '-R', 'https://dist.apache.org/repos/dist/release/incubator']) as s:
     for line in s.stdout:
         line = line.rstrip()
         fields = line.split('/')
         podling = fields[0]
         distareas[podling] = True
         file = fields[-1]
         if file:
             if re.search('KEYS(\.txt)?$', file):
                 keysList[
                     podling] = "{0}/{1}".format("http://www.apache.org/dist/incubator", line)
             if re.search('(bz2|gz|zip)\.(asc|sig)$', file, flags=re.IGNORECASE):
                 if re.search('incubat(ing|or)', file, flags=re.IGNORECASE):
                     releases[podling] = True
                 else:
                     releasesBadName[podling] = True

 for k in releases:
     # FIXME: need to handle projects[k]['resourceNames']
     if not k in projects:
         if k in graduatedProjects:
             errorMsg = "{0}: Has graduated, but still has remains on Incubator distribution mirrors".format(
                 k)
             print('ERROR:', errorMsg)
             errorMsg += ". See <a href=\"#h-Graduate\">help</a>."
             otherIssues.append(errorMsg)
             continue
         if k in retiredProjects:
             print(
                 "INFO: {0}: retired project has remains on Incubator mirrors".format(k))

 for k in releasesBadName:
     errorMsg = '{0}: Has a distribution filename missing the word "incubating/incubator"'.format(
         k)
     print('ERROR:', errorMsg)
     errorMsg += ". See <a href=\"#h-hasRelease\">help</a>."
     otherIssues.append(errorMsg)
     if k in graduatedProjects:
         errorMsg = "{0}: Has graduated, but still has remains on Incubator distribution mirrors".format(
             k)
         print('ERROR:', errorMsg)
         errorMsg += ". See <a href=\"#h-Graduate\">help</a>."
         otherIssues.append(errorMsg)

 # Processing the gathered sata ---

 print("Processing ...")
 # Process the reporting schedule data, correlate and ensure each exists in the
 # incubation projects summary table, add more details to the data store.
 projectNames = list(projects.keys())
 for k in sorted(projectNames, key=str.lower):
     print(k)

     statusFile = SITE_CONTENT_DIR + \
         "projects/{0}.xml".format(projects[k]['statusFileName'])
     if not os.path.exists(statusFile):
         errorMsg = "{0}: Missing status file".format(k)
         print('ERROR:', errorMsg)
         errorMsg += ". See <a href=\"#h-hasStatusEntry\">help</a>."
         otherIssues.append(errorMsg)
         projects[k]['hasStatusEntry'] = False
         continue

     startDate = projects[k]['startDate']
     match = re.search(startDateRE, startDate)
     if match:
         if match.group(3) != None:
             entryDateDay = int(match.group(3))
         else:
             entryDateDay = 1
         try:
             entryDate = datetime.datetime(
                 int(match.group(1)), int(match.group(2)), entryDateDay)
         except ValueError:
             print("ERROR: {0}: ValueError with date".format(k))
         else:
             projects[k]['entryDate'] = entryDate

     # Gather recent updates to their status page.
     inputFile = SITE_CONTENT_DIR + \
         "projects/{0}.xml".format(projects[k]['statusFileName'])
     if optionVerbose:
         print("DEBUG: Parsing svn log for {0} ...".format(inputFile))
     outputString = osExec(['svn', 'log', '--xml', inputFile])
     dom = xml.dom.minidom.parseString(outputString)
     rowCounter = 0
     count1 = 0
     count2 = 0
     count3 = 0
     for row in dom.getElementsByTagName("logentry"):
         # Skip counting various commits which were to standardise the status
         # files.
         matchSvnSkip = re.search(
             svnRevisionSkipRE, row.getAttribute('revision'))
         if matchSvnSkip:
             continue
         rowCounter += 1
         date = getText(row.getElementsByTagName("date")[0].childNodes)
         matchSvn = re.search(statusLogRE, date)
         thisDate = datetime.datetime(
             int(matchSvn.group(1)), int(matchSvn.group(2)), int(matchSvn.group(3)))
         if rowCounter == 1:
             projects[k]['statusLastUpdated'] = "{0:4d}-{1:02d}-{2:02d}".format(
                 int(matchSvn.group(1)), int(matchSvn.group(2)), int(matchSvn.group(3)))
         if thisDate >= statusTallyDate1:
             count1 += 1
         if thisDate >= statusTallyDate2:
             count2 += 1
         if thisDate >= statusTallyDate3:
             count3 += 1
     if projects[k]['entryDate'] >= statusTallyDate1:
         count2 = "-"
     if projects[k]['entryDate'] >= statusTallyDate2:
         count3 = "-"
     projects[k]['statusUpdateCounts'] = "{0},{1},{2}".format(
         count1, count2, count3)

     dom.unlink()

 # end of processing

 # Collect SVN directory names ---

 print("Collect SVN directory names")
 incubatorSvnDirs = {}  # top-level SVN incubator dirs
 outputString = osExec(
     ['svn', 'ls', '--xml', 'http://svn.apache.org/repos/asf/incubator/'])
 dom = xml.dom.minidom.parseString(outputString)
 """
 Sample output
 <lists>
   <list path="http://svn.apache.org/repos/asf/incubator">
     <entry kind="file">
     <name>REPO-ORGANISATION.txt</name>
     ...
     </entry>
     <entry kind="dir">
     <name>accumulo</name>
     ...
 """
 for entry in dom.getElementsByTagName("entry"):
     if entry.getAttribute("kind") == 'dir':
         name = entry.getElementsByTagName("name")[0].firstChild.data
         if name not in ('trunk', 'public'):  # skip non-podling entries
             incubatorSvnDirs[
                 "http://svn.apache.org/repos/asf/incubator/{0}/".format(name)] = True

 # Detect certain resources ---

 print("Detect certain resources ...")
 for k in sorted(projectNames, key=str.lower):
     print(k)

     # Add the number of committers
     # Sometimes the committer SVN group name contains the sponsor TLP,
     # e.g. portals-wsrp4j
     svnGroups = projects[k]['resourceNames'][:]
     sponsor = projects[k]['sponsor'].lower()
     if '?' in sponsor:
         sponsor = "incubator"
     if not 'incubator' in sponsor:
         tlpSvn = "{0}-{1}".format(sponsor, k)
         svnGroups.append(tlpSvn)
     for svnGroup in svnGroups:
         if optionVerbose:
             print("DEBUG: Trying committers group '{0}'".format(svnGroup))
         if svnGroup in committers_projects:
             projects[k]['numberCommitters'] = len(
                 committers_projects[svnGroup])
             projects[k]['committersSvn'] = svnGroup
             break
         else:
             continue
     if projects[k]['committersSvn'] == None and optionInfo:
         print("INFO: {0}: Does not yet have committers accounts".format(k))

     # Detect if they have SVN yet.
     # First, try the URL from their status page
     # then, try URLs based on their resourceNames.
     if optionUseClutchState and projects[k]['hasClutchState'] and state[k]['urlSvn']:
         projects[k]['urlSvn'] = state[k]['urlSvn']
         incubatorSvnDirs[normaliseSVNurl(state[k]['urlSvn'])] = 'used'
     else:
         urls = []
         try:
             projects[k]['urlSvn']
         except:
             pass
         else:
             if projects[k]['urlSvn'] != '':
                 urls.append(projects[k]['urlSvn'])
         for name in projects[k]['resourceNames']:
             urls.append(
                 "https://svn.apache.org/repos/asf/incubator/{0}/".format(name))
         for url in urls:
             if optionVerbose:
                 print("DEBUG: Trying SVN URL " + url)
             if normaliseSVNurl(url) in incubatorSvnDirs:
                 projects[k]['urlSvn'] = url
                 incubatorSvnDirs[url] = name  # mark used
                 break
             try:
                 getUrl(url)
             except IOError:
                 projects[k]['urlSvn'] = ''
             else:
                 projects[k]['urlSvn'] = url
                 break
         if not projects[k]['urlSvn'] and optionInfo:
             print("INFO: {0}: Does not yet have SVN".format(k))

     # Detect if they have Tracker yet.
     # First, try the url from their status page
     # then, try a standard url.
     if optionUseClutchState and projects[k]['hasClutchState'] and state[k]['urlTracker']:
         projects[k]['urlTracker'] = state[k]['urlTracker']
     else:
         urlTrackerDefault = "https://issues.apache.org/jira/browse/" + \
             projects[k]['statusFileName'].upper()
         if urlTrackerDefault == projects[k]['urlTracker']:
             urlTrackerDefault = ""
         for url in [projects[k]['urlTracker'], urlTrackerDefault]:
             if url == "":
                 continue
             if optionVerbose:
                 print("DEBUG: Trying Tracker URL: " + url)
             try:
                 getUrl(url)
             except IOError:
                 projects[k]['urlTracker'] = ""
             else:
                 projects[k]['urlTracker'] = url
                 break
         if not projects[k]['urlTracker'] and optionInfo:
             print("INFO: {0}: Does not yet have an Issue Tracker".format(k))

     # Detect if they have a website yet.
     # First, try the url from their status page
     # then, try a standard url.
     if optionUseClutchState and projects[k]['hasClutchState'] and state[k]['urlWww']:
         projects[k]['urlWww'] = state[k]['urlWww']
     else:
         urlWwwDefault = "http://{0}.incubator.apache.org/".format(
             projects[k]['statusFileName'])
         urlWwwDefault2 = "http://incubator.apache.org/{0}/".format(
             projects[k]['statusFileName'])
         if urlWwwDefault == projects[k]['urlWww']:
             urlWwwDefault = ""
         if urlWwwDefault2 == projects[k]['urlWww']:
             urlWwwDefault2 = ""
         for url in [projects[k]['urlWww'], urlWwwDefault, urlWwwDefault2]:
             if url == "":
                 continue
             try:
                 getUrl(url)
             except IOError:
                 projects[k]['urlWww'] = ""
             else:
                 projects[k]['urlWww'] = url
                 break
         if not projects[k]['urlWww'] and optionInfo:
             print("INFO: {0}: Does not yet have a website".format(k))

     # See if they have a distribution area yet.
     if optionUseClutchState and projects[k]['hasClutchState'] and state[k]['urlDist']:
         projects[k]['urlDist'] = state[k]['urlDist']
     else:
         for nameDist in projects[k]['resourceNames']:
             urlDist = "http://www.apache.org/dist/incubator/{0}/".format(
                 nameDist)
             urlMirror = "http://www.apache.org/dyn/closer.cgi/incubator/{0}/".format(
                 nameDist)
             if nameDist in distareas:
                 projects[k]['urlDist'] = urlMirror
                 break
     if not projects[k]['urlDist']:
         if optionInfo:
             print("INFO: {0}: Does not yet have a distribution area".format(k))
     elif optionVerbose:
         print("DEBUG: dist=" + projects[k]['urlDist'])

     # Detect if they have a PGP KEYS file
     if projects[k]['urlDist']:
         match = re.search("/incubator/([^/]+)/", projects[k]['urlDist'])
         if match:
             nameDistArea = match.group(1)
             if nameDistArea in keysList:
                 projects[k]['urlKeys'] = keysList[nameDistArea]
             else:
                 if optionInfo:
                     print(
                         "INFO: {0}: Does not yet have a PGP KEYS file".format(k))
     if optionVerbose:
         print("DEBUG: KEYS=" + projects[k]['urlKeys'])

     # Detect mail lists established:
     # For each alternate resourceName:
     # First, try the list names from their status page
     # then, try a standard list name under incubator.
     # To reduce network queries, if it is an incubator-hosted list then look up in
     # the list of mail-lists already gathered, otherwise it is a TLP-hosted list,
     # so try getting the archives URL.
     foundMailLists = False
     for projectName in projects[k]['resourceNames']:
         for listType in ['dev', 'commits']:
             if listType == "dev":
                 mailListHintKey = "hintMailListDev"
                 mailListKey = "hasMailListDev"
             else:
                 mailListHintKey = "hintMailListCommits"
                 mailListKey = "hasMailListCommits"
             if optionVerbose:
                 print("DEBUG: Looking for mailList: " +
                       projects[k][mailListHintKey])
             matchMail = re.search(mailListNameRE, projects[k][mailListHintKey])
             if matchMail:
                 mailListGroup = matchMail.group(1)
                 mailListNameHint = matchMail.group(2)
             else:
                 mailListGroup = "incubator"
                 mailListNameHint = ""
             if optionVerbose:
                 print("DEBUG: Trying mailListGroup={0} mailListNameHint={1}".format(
                     mailListGroup, mailListNameHint))
             if mailListGroup == "incubator":
                 mailListNameDefault = "{0}-{1}".format(projectName, listType)
                 if mailListNameDefault == mailListNameHint:
                     mailListNameDefault = ""
                 for listName in [mailListNameHint, mailListNameDefault]:
                     if listName == "":
                         continue
                     if optionVerbose:
                         print("DEBUG: Trying listName=" + listName)
                     if not projectName in projectMailLists:
                         if optionVerbose:
                             print("DEBUG: {0}: No incubator group mail lists using '{1}'".format(
                                 k, projectName))
                         break
                     if listType in projectMailLists[projectName]:
                         leader = 'incubator-' if (
                             k not in mailLists.newStyle) else ''
                         projects[k][mailListKey] = MAIL_LIST_URL + \
                             "{0}{1}/".format(leader,
                                              projectMailLists[projectName][listType])
                         if optionVerbose:
                             print("DEBUG: Successful Incubator mail url: " +
                                   projects[k][mailListKey])
                         foundMailLists = True
                         break
                     else:
                         if optionInfo:
                             print("INFO: {0}: Does not yet have hinted incubator mail list '{1}-{2}'".format(
                                 k, projectName, listType))
                         projects[k][mailListKey] = ""
             # End of processing incubator group mail list.
             else:
                 listName = projects[k][mailListHintKey]
                 url = "http://mail-archives.apache.org/mod_mbox/{0}/".format(
                     listName)
                 if optionVerbose:
                     print("DEBUG: Trying mail url: " + url)
                 try:
                     getUrl(url)
                 except IOError:
                     projects[k][mailListKey] = ""
                 else:
                     projects[k][mailListKey] = url
                     if optionVerbose:
                         print("DEBUG: Successful TLP mail url: " + url)
                     foundMailLists = True
         if foundMailLists:
             break
     # End of processing project mail lists.
     if not projects[k]['hasMailListDev'] and optionInfo:
         print("INFO: {0}: Does not yet have 'dev' mail list".format(k))
     if not projects[k]['hasMailListCommits'] and optionInfo:
         print("INFO: {0}: Does not yet have 'commits' mail list".format(k))

 # end of processing each podling to detect resource availability

 if optionInfo:
     for entry in sorted(incubatorSvnDirs):
         if incubatorSvnDirs[entry] == True and entry in graduatedProjects:
             print("INFO: graduated project has SVN directory " + entry)

 # Output data files ---

 print("Output the data ...")
 reportingGroups = {'month': 'Monthly',
                    'group-1': 'January,April,July,October',
                    'group-2': 'February,May,August,November',
                    'group-3': 'March,June,September,December'}
 monthsLong = 'January February March April May June July August September October November December'.split()
 nameCurrentReport = "{0}{1}".format(
     monthsLong[gatherDate.month - 1], gatherDate.year)
 urlCurrentReport = "".join(
     ["http://wiki.apache.org/incubator/", nameCurrentReport])

 fileXmlMY = open(SITE_CONTENT_DIR + 'clutchmy.ent', encoding='utf-8', mode='w')
 fileXmlMY.write(
     '<a href="{0}">{1}</a>\n'.format(urlCurrentReport, nameCurrentReport))
 fileXmlMY.close()

 fileList = open(SITE_CONTENT_DIR + 'clutch.txt', 'w')

 fileXmlo1 = open(SITE_CONTENT_DIR + 'clutcho1.ent', encoding='utf-8', mode='w')
 fileXmlo1.write("<!-- generated by clutch; do not edit -->\n")
 if len(otherIssues):
     otherXml = """<li>other issues <a href="#other">listed</a> below for: """
     otherIssuesRE = re.compile("^([^:]+):.*$")
     otherIssues.sort()
     for issue in otherIssues:
         matchOtherIssues = re.search(otherIssuesRE, issue)
         otherXml += '\n <span class="care">{0}</span> '.format(
             matchOtherIssues.group(1))
     otherXml += "\n</li>\n"
     fileXmlo1.write(otherXml)
 fileXmlo1.close()

 fileXmlt = open(SITE_CONTENT_DIR + 'clutcht.ent', encoding='utf-8', mode='w')
 fileXmlt.write("<!-- generated by clutch; do not edit -->\n")
 tableTopXml = """
         Clutch last gathered: {0} UTC.<br />
         Number of podlings in incubation: {1}
 """.format(gatherDateString, len(projects))
 fileXmlt.write(tableTopXml)
 fileXmlt.close()

 fileList.write('#identifier,name,sponsor\n')
 reportList1 = ""
 reportList2 = ""
 reportList3 = ""
 tableRowCount = 0
 tableRowCountMid = int(len(projects) / 2)
 fileXml = open(SITE_CONTENT_DIR + 'clutchr1.ent', encoding='utf-8', mode='w')
 fileXml.write("<!-- generated by clutch; do not edit -->\n")
 for k in sorted(projectNames, key=str.lower):
     tableRowCount += 1
     if tableRowCount == tableRowCountMid:
         fileXml.close()
         fileXml = open(SITE_CONTENT_DIR + 'clutchr2.ent',
                        encoding='utf-8', mode='w')
         fileXml.write("<!-- generated by clutch; do not edit -->\n")
     fileXml.write('        <tr id="{0}">\n'.format(k))
     fileXml.write('          <td')
     if k in graduatingOrRetiring:
         fileXml.write(' class="grad"')
     fileXml.write('>{0}</td>\n'.format(projects[k]['fullName']))
     persist[k] = {}
     persist[k]['podlingName'] = projects[k]['name']
     persist[k]['fullName'] = projects[k]['fullName']

     if '?' in projects[k]['sponsor']:
         fileXml.write(
             '          <td class="issue">{0}</td>\n'.format(projects[k]['sponsor']))
     else:
         fileXml.write(
             '          <td>{0}</td>\n'.format(projects[k]['sponsor']))
     persist[k]['sponsor'] = projects[k]['sponsor']
     persist[k]['description'] = projects[k]['description']
     persist[k]['mentors'] = projects[k]['mentors']

     fileXml.write('          <td>{0}</td>\n'.format(projects[k]['startDate']))
     persist[k]['startDate'] = projects[k]['startDate']

     # elapsedDays column
     fileXml.write('          <td></td>\n')

     if not projects[k]['reportingMonthly']:
         fileXml.write(
             '          <td>{0}</td>\n'.format(projects[k]['reportingMonthly']))
     else:
         fileXml.write(
             '          <td class="care">{0}</td>\n'.format(projects[k]['reportingMonthly']))
     persist[k]['reportingMonthly'] = projects[k]['reportingMonthly']

     fileXml.write(
         '          <td>{0}</td>\n'.format(projects[k]['reportingGroup']))
     # save the simple group number for programs that have their own ideas.
     persist[k]['rawReportingGroup'] = projects[k]['reportingGroup']
     persist[k]['reportingGroup'] = reportingGroups[
         projects[k]['reportingGroup']]
     reportDevList = '"{0} Developers"'.format(projects[k]['fullName'])
     if projects[k]['hasMailListDev']:
         matchDevMail = re.search(mailListNameUrlRE, projects[
                                  k]['hasMailListDev'])
         if matchDevMail:
             mailListGroup = None
             for alias in projects[k]['resourceNames']:
                 if (alias in mailLists.newStyle):
                     mailListGroup = alias
             if (mailListGroup != None):
                 reportDevList += " <dev@{0}.incubator.apache.org>".format(
                     mailListGroup)
             else:
                 reportDevList += " <{0}@{1}.apache.org>".format(
                     matchDevMail.group(2), matchDevMail.group(1))
         else:
             reportDevList += " <general@incubator.apache.org>"
     else:
         reportDevList += " <general@incubator.apache.org>"
     if optionVerbose:
         print("DEBUG: {0}: reportDevList={1}".format(k, reportDevList))
     reportDevList += "\n"
     if projects[k]['reportingMonthly']:
         reportList1 += reportDevList
         reportList2 += reportDevList
         reportList3 += reportDevList
     else:
         if (projects[k]['reportingGroup'] == "group-1"):
             reportList1 += reportDevList
         elif (projects[k]['reportingGroup'] == "group-2"):
             reportList2 += reportDevList
         elif (projects[k]['reportingGroup'] == "group-3"):
             reportList3 += reportDevList

     if projects[k]['hasReportingGroup']:
         fileXml.write(
             '          <td class="cool1">{0}</td>\n'.format(projects[k]['hasReportingGroup']))
     else:
         fileXml.write(
             '          <td class="issue">{0}</td>\n'.format(projects[k]['hasReportingGroup']))

     if projects[k]['hasStatusEntry']:
         fileXml.write('          <td class="cool1"><a href="projects/{0}.html">{1}</a></td>\n'.format(
             projects[k]['statusFileName'], projects[k]['hasStatusEntry']))
     else:
         fileXml.write(
             '          <td class="issue">{0}</td>\n'.format(projects[k]['hasStatusEntry']))

     fileXml.write(
         '          <td>{0}</td>\n'.format(projects[k]['statusLastUpdated']))

     # statusAge column
     fileXml.write('          <td></td>\n')

     fileXml.write(
         '          <td>{0}</td>\n'.format(projects[k]['statusUpdateCounts']))

     if projects[k]['numberCommitters'] > 0:
         if projects[k]['numberCommitters'] > 2:
             fileXml.write('          <td class="cool1 number"><a href="http://people.apache.org/committers-by-project.html#{0}">{1}</a></td>\n'.format(
                 projects[k]['committersSvn'], projects[k]['numberCommitters']))
         else:
             fileXml.write('          <td class="care number"><a href="http://people.apache.org/committers-by-project.html#{0}">{1}</a></td>\n'.format(
                 projects[k]['committersSvn'], projects[k]['numberCommitters']))
     else:
         fileXml.write('          <td class="care">-</td>\n')

     if projects[k]['numberCommittersNew'] > 0:
         if projects[k]['numberCommittersNew'] > 1:
             fileXml.write(
                 '          <td class="cool1 number">{0}</td>\n'.format(projects[k]['numberCommittersNew']))
         else:
             fileXml.write(
                 '          <td class="cool2 number">{0}</td>\n'.format(projects[k]['numberCommittersNew']))
     else:
         fileXml.write('          <td class="care number">0</td>\n')

     if projects[k]['urlSvn']:
         fileXml.write(
             '          <td class="cool1"><a href="{0}">True</a></td>\n'.format(projects[k]['urlSvn']))
     else:
         fileXml.write('          <td class="care">False</td>\n')
     persist[k]['urlSvn'] = projects[k]['urlSvn']

     if projects[k]['urlTracker']:
         fileXml.write(
             '          <td class="cool1"><a href="{0}">True</a></td>\n'.format(projects[k]['urlTracker']))
     else:
         fileXml.write('          <td class="care">False</td>\n')
     persist[k]['urlTracker'] = projects[k]['urlTracker']

     hasUrl = re.search(urlHttpRE, projects[k]['hasMailListDev'])
     if hasUrl:
         fileXml.write(
             '          <td class="cool1"><a href="{0}">True</a></td>\n'.format(projects[k]['hasMailListDev']))
     else:
         fileXml.write('          <td class="care">False</td>\n')
     persist[k]['hasMailListDev'] = projects[k]['hasMailListDev']

     hasUrl = re.search(urlHttpRE, projects[k]['hasMailListCommits'])
     if hasUrl:
         fileXml.write('          <td class="cool1"><a href="{0}">True</a></td>\n'.format(
             projects[k]['hasMailListCommits']))
     else:
         fileXml.write('          <td class="care">False</td>\n')
     persist[k]['hasMailListCommits'] = projects[k]['hasMailListCommits']

     if projects[k]['urlWww']:
         fileXml.write(
             '          <td class="cool1"><a href="{0}">True</a></td>\n'.format(projects[k]['urlWww']))
     else:
         fileXml.write('          <td class="care">False</td>\n')
     persist[k]['urlWww'] = projects[k]['urlWww']

     if projects[k]['urlDist']:
         fileXml.write(
             '          <td class="cool1"><a href="{0}">True</a></td>\n'.format(projects[k]['urlDist']))
     else:
         fileXml.write('          <td class="care">False</td>\n')
     persist[k]['urlDist'] = projects[k]['urlDist']

     if projects[k]['urlKeys']:
         fileXml.write(
             '          <td class="cool1"><a href="{0}">True</a></td>\n'.format(projects[k]['urlKeys']))
     else:
         fileXml.write('          <td class="care">False</td>\n')

     match = re.search(distMirrorRE, projects[k]['urlDist'])
     if match:
         if match.group(1) in releases:
             fileXml.write(
                 '          <td class="cool1"><a href="{0}">True</a></td>\n'.format(projects[k]['urlDist']))
         else:
             fileXml.write('          <td class="care">False</td>\n')
     else:
         fileXml.write('          <td class="care">False</td>\n')

     fileXml.write('        </tr>\n')

     fileList.write('{0},"{1}","{2}"\n'.format(
         k, projects[k]['name'], projects[k]['sponsor']))

 fileXml.close()
 # End of rows

 # Other issues
 fileXmlo2 = open(SITE_CONTENT_DIR + 'clutcho2.ent', encoding='utf-8', mode='w')
 fileXmlo2.write("<!-- generated by clutch; do not edit -->\n")
 if len(otherIssues):
     otherIssues.sort()
     for issue in otherIssues:
         fileXmlo2.write("        <li>{0}</li>\n".format(issue))
 else:
     fileXmlo2.write("        <li>No known issues.</li>\n")
 fileXmlo2.close()


 mentors = list(mentorsProjects.keys())
 mentors.sort()
 fileXmlm = open(SITE_CONTENT_DIR + 'clutchm.ent', encoding='utf-8', mode='w')
 fileXmlm.write("<!-- generated by clutch; do not edit -->\n")
 for mentor in mentors:
     fileXmlm.write("        <li><strong>{0}</strong>: {1}</li>\n".format(
         mentor, ', '.join(mentorsProjects[mentor])))
 fileXmlm.close()

 fileList.close()

 fileReport1 = open(SITE_CONTENT_DIR + 'report_due_1.txt', 'w')
 fileReport1.write(reportList1)
 fileReport1.close()
 fileReport2 = open(SITE_CONTENT_DIR + 'report_due_2.txt', 'w')
 fileReport2.write(reportList2)
 fileReport2.close()
 fileReport3 = open(SITE_CONTENT_DIR + 'report_due_3.txt', 'w')
 fileReport3.write(reportList3)
 fileReport3.close()

 # Create the persistent data file.
 outputFile = open('clutch.pkl', 'wb')
 pickle.dump(persist, outputFile, protocol=3)
 outputFile.close()

 print("Done. Generated clutch*.ent files.")
 print("Now you need to re-build the site, as usual.")