blob: 73b531d8f2483e58179ffb3b091e4fb8578cc972 [file] [log] [blame]
# @@@ START COPYRIGHT @@@
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# @@@ END COPYRIGHT @@@
#
# This script checks for files that have been changed in a given
# workspace. It then checks that the copyrights in the file end
# in the current year. If they don't, it returns an error or
# optionally updates them.
#
# If the environment variable UPDATE_COPYRIGHTS is set to YES,
# or if the --update option is specifed, then the script
# updates copyrights that need updating. No error is
# returned in that case (unless some other more severe error is
# found). If the environment variable is absent or set to something
# other than YES, and the --update option is not specified, then
# an out-of-date copyright causes an error.
#
import os
import sys
import re
import datetime
import subprocess
# import argparse requires Python 2.7 unfortunately
class LineParser:
def __init__(self):
# require that the entire string match the pattern
# (note the $ at the end of the pattern)
pattern = r'#[\s]+(?P<status>(new file:|modified:))' + \
r'[\s]+(?P<filename>[\S]*)\n$'
self.lineParser = re.compile(pattern)
# the parseState is not really used now, but it might
# be useful for debugging
self.parseState = "INIT"
def parseLine(self, line):
"""Parse a line from a 'git status' command"""
fileName = None
if line == "# Changes to be committed:\n":
self.parseState = "TO_BE_COMMITTED"
elif line == "# Changed but not updated:\n":
self.parseState = "CHANGED_NOT_UPDATED"
elif line == "# Untracked files:\n":
self.parseState = "UNTRACKED"
else:
m = self.lineParser.match(line)
if m:
fileName = m.group('filename')
result = fileName
return result
# beginning of main
# process command line arguments
# Note: It's easiest to use the argparse module, but unfortunately
# it is only installed as part of Python with 2.7, and many
# workstations are on 2.6. I've left the original argparse code
# here, commented out, so we can use it in the future as
# Python 2.7 and later versions become more common.
#parser = argparse.ArgumentParser(
# description='This script checks for out-of-date copyrights.',
# epilog='''If the environment variable UPDATE_COPYRIGHTS=YES is present,
# the script behaves as if --update was specified.''')
#
# --update takes no arguments
#line = 'If specified, out-of-date copyrights are automatically updated.'
#parser.add_argument('--update', action='store_true', help=line)
#
# --directory takes one argument, the directory name
#parser.add_argument('--directory',
# help='Defaults to the present working directory.')
#
#args = parser.parse_args() # exits and prints help if args are incorrect
# the code below does the equivalent of the argparse code above
doUsage = False
expectingDirectory = False
first = True
class Args:
def __init__(self):
self.directory = ""
self.update = False
args = Args()
for arg in sys.argv:
if first:
first = False
elif expectingDirectory:
# make sure the argument doesn't begin with minus
# since we are expecting a directory name
if arg[0] == "-":
doUsage = True
else:
args.directory = arg
expectingDirectory = False
else:
if arg == "--update":
args.update = True
elif arg == "--directory":
expectingDirectory = True
else: # -h, --help, or invalid argument
doUsage = True
if expectingDirectory: # if --directory at end lacks argument
doUsage = True
if doUsage:
print "usage: updateCopyrightCheck.py [-h] [--update] " + \
"[--directory DIRECTORY]"
print
print "This script checks for out-of-date copyrights."
print
print "optional arguments:"
print " -h, --help show this help message and exit"
print " --update If specified, out-of-date " + \
"copyrights are automatically"
print " updated."
print " --directory DIRECTORY Defaults to the present working directory."
print
print "If the environment variable UPDATE_COPYRIGHTS=YES " + \
"is present, the script"
print "behaves as if --update was specified."
print
exit(0)
# end of argparse replacement code
directory = args.directory
if directory: # if a directory was specified, switch to it
os.chdir(directory)
update = args.update
if not update:
if os.environ.__contains__('UPDATE_COPYRIGHTS'):
if os.environ['UPDATE_COPYRIGHTS'] == 'YES':
update = True
# initialize counters
filesLackingCopyright = 0
filesCorrectCopyright = 0
filesIncorrectCopyright = 0
filesUpdatedCopyright = 0
filesMultipleCopyright = 0
filesFailedUpdate = 0
# get the current year and set up pattern matching strings
currentYear = str(datetime.datetime.now().year)
matchString = r'\(C\) Copyright [0-9\-]*' + currentYear + \
r' Hewlett-Packard Development Company'
reCheckYear = re.compile(matchString)
matchString2 = r'\(C\) Copyright (?P<startYear>([0-9]+\-)?)' + \
r'(?P<oldYear>[0-9]+) Hewlett-Packard Development Company'
reCaptureOldYear = re.compile(matchString2)
# get the set of files changed
cmd = ['git', 'status']
gitProcess = subprocess.Popen(cmd, stdout=subprocess.PIPE)
# for each file
# read it
# if it lacks copyright, mention that
# else if the copyright is wrong
# update the file to replace the copyright
# mention that we did so
print
lineParser = LineParser()
for line in gitProcess.stdout:
parseResult = lineParser.parseLine(line)
if parseResult:
# The copyright line of interest looks like:
#
# // (x) <xxxxxxxxx> 1998-2014 Hewlett-Packard Development Company, L.P.
#
# (It is actually (C), not (x) and Copyright not <xxxxxxxxx>.
# I obfuscated the comment so this
# script wouldn't mistakenly report itself as having multiple
# copyrights and to pass copyright checks from Apache :-)
#
# Of course the comment delimiter will vary with the file language.
cmdGrep = ['grep',
'(C) Copyright [0-9\-]*'
+ ' Hewlett-Packard Development Company',
parseResult]
grepProcess = subprocess.Popen(cmdGrep, stdout=subprocess.PIPE)
matchCount = 0
needsEdit = 1
savedLine = None
for copyrightLine in grepProcess.stdout:
matchCount = matchCount + 1
savedLine = copyrightLine
m = reCheckYear.search(copyrightLine)
if m:
needsEdit = False
if matchCount == 0:
filesLackingCopyright = filesLackingCopyright + 1
print '*** File ' + parseResult + \
' lacks a copyright; please add one.'
elif matchCount > 1:
filesMultipleCopyright = filesMultipleCopyright + 1
print '*** File ' + parseResult + \
' appears to have multiple copyrights; please check manually.'
elif needsEdit:
# exactly one copyright line, as expected, but old copyright
filesIncorrectCopyright = filesIncorrectCopyright + 1
if update: # try to update it if --update option was specified
print '*** Updating file ' \
+ parseResult + ' to current copyright.'
m2 = reCaptureOldYear.search(savedLine)
if m2:
filesUpdatedCopyright = filesUpdatedCopyright + 1
filesIncorrectCopyright = filesIncorrectCopyright - 1
oldYear = m2.group('oldYear')
startYear = m2.group('startYear')
if len(startYear) > 0:
# it's yyyy-zzzz, just replace zzzz with 2015
substituteCommand = '/(C) Copyright [0-9\-]* ' \
+ 'Hewlett-Packard Development Company/' \
+ 's/' + oldYear + '/' + currentYear + '/g\n'
else: # it's just zzzz, replace with zzzz-2015
substituteCommand = '/(C) Copyright [0-9\-]* ' \
+ 'Hewlett-Packard Development Company/' \
+ 's/' + oldYear + '/' + oldYear + '-' \
+ currentYear + '/g\n'
writeCommand = 'w\n'
cmdEd = ['ed', '-s', parseResult]
edProcess = subprocess.Popen(cmdEd, stdin=subprocess.PIPE)
edProcess.stdin.write(substituteCommand)
edProcess.stdin.write(writeCommand)
edProcess.stdin.close()
print '*** Update completed.'
else:
filesFailedUpdate = filesFailedUpdate + 1
print '*** Update failed; please check manually.'
else: # --update not specified
print '*** File ' + parseResult + \
' has an incorrect copyright; please update.'
else:
print 'File ' + parseResult + ' has the correct copyright.'
filesCorrectCopyright = filesCorrectCopyright + 1
# print out a count of files having correct copyrights,
# the number updated, and the number missing copyrights etc.
exitCode = 0
print
print "Number of files lacking copyright: " + str(filesLackingCopyright)
print "Number of files with correct copyright: " + str(filesCorrectCopyright)
print "Number of files with updated copyright: " + str(filesUpdatedCopyright)
if filesIncorrectCopyright > 0:
print "Number of files with incorrect copyrights: " + \
str(filesIncorrectCopyright)
exitCode = 1
if filesFailedUpdate > 0:
print "Number of files where copyright update failed: " + \
str(filesFailedUpdate)
exitCode = 1
if filesMultipleCopyright > 0:
print "Number of files with multiple copyrights: " + \
str(filesMultipleCopyright)
exitCode = 1
print
exit(exitCode)