blob: e7f55e34c95953b2b6f7df7bd9a5cd2df9a9bc1f [file] [log] [blame]
#! /usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This file is used to generate the BuildStamp.java class that
# records the user, url, revision and timestamp.
import os
import sys
import re
import subprocess
import getpass
import time
import socket
import hashlib
def usage():
print >>sys.stderr, "Usage: saveVersion.py <version> <build_directory>"
print >>sys.stderr, "Eg: saveVersion.py 1.1.0 build"
# This template is what gets written to build/src/o.a.h/BuildStamp.java
# It is filled out by (version, revision, user, date, url, srcChecksum)
template_string = \
'''/*
* Generated by src/saveVersion.py
*/
@HadoopVersionAnnotation(version="%s", revision="%s",
user="%s", date="%s", url="%s",
srcChecksum="%s")
package org.apache.hadoop;
'''
# Convert Windows paths with back-slashes into canonical Unix paths
# with forward slashes. Unix paths are unchanged. Mixed formats
# just naively have back-slashes converted to forward slashes.
def canonicalpath(path):
return re.sub('\\\\','/',path)
# This is an implementation of md5sum-like functionality in pure python.
# The path name is output in canonical form to get the same aggregate
# checksum as Linux.
def md5sum(fname):
block_size = 2**20
f = open(os.path.normpath(fname), 'rb') # open in binary mode to make sure EOLs don't get munged
md5 = hashlib.md5()
while True:
data = f.read(block_size)
if not data:
break
md5.update(data)
f.close()
return "%s %s\n" % (md5.hexdigest(), canonicalpath(fname))
# Roughly emulate the Python 2.7 functionality of "subprocess.check_output()"
# by accepting a CLI vector [cmd, arg1, arg2, ...], and returning its stdout result,
# but without the "checking". For use in lower version Python interpreters.
def subprocessOutput(args):
# result = subprocess.check_output(args, shell=True) # requires py2.7
process = subprocess.Popen(args, stdout=subprocess.PIPE)
result = process.communicate()[0].strip()
return result
# Strip surrounding quotes from a string object that carried its quotes with it.
def stripquotes(s):
if (s[0] == '"' and s[-1] == '"') or (s[0] == "'" and s[-1] == "'") :
return s[1:-1]
elif (s[0] == '\\' and s[-1] == '"') :
return s[1:-1]
else:
return s
def main(argv=None):
if argv is None:
argv = sys.argv[1:3]
if (len(argv) != 2) or (argv[0] == "--help") or (argv[0] == "-h"):
usage()
return -1
version = argv[0]
build_dir = argv[1]
user = getpass.getuser()
date = time.strftime("%a %b %d %H:%M:%S %Z %Y") # simulate `date`
if os.path.isdir(".git"):
revision = stripquotes(subprocessOutput(['git', 'log', '-1', '--pretty=format:"%H"']))
origin = subprocessOutput(['git', 'config', '--get', 'remote.origin.url'])
branch = subprocessOutput(['git', 'branch'])
filter_current_branch = re.compile(r'^\* (.*)$', re.MULTILINE)
current_branch = filter_current_branch.search(branch).group(1).strip()
url = "%s on branch %s" % (origin, current_branch)
else:
svn_info = subprocessOutput(['svn', 'info'])
filter_last_revision = re.compile(r'^Last Changed Rev: (.*)$', re.MULTILINE)
revision = filter_last_revision.search(svn_info).group(1).strip()
filter_url = re.compile(r'^URL: (.*)$', re.MULTILINE)
url = filter_url.search(svn_info).group(1).strip()
filter_java = re.compile(r'.+\.java$')
file_list = []
for root, dirs, files in os.walk("src"):
for name in files:
if filter_java.match(name):
file_list.append(canonicalpath(os.path.join(root, name)))
# Sorting is done on unix-format names, case-folded, in order to get a platform-independent sort.
file_list.sort(key=str.upper)
file_count = len(file_list)
hash = hashlib.md5()
for name in file_list:
file_hash_string = md5sum(name)
hash.update(file_hash_string)
srcChecksum = hash.hexdigest()
target_dir = os.path.join(os.path.normpath(build_dir),
os.path.normpath('src/org/apache/hadoop'))
if not os.path.exists(target_dir):
os.makedirs(target_dir)
target_file = os.path.join(target_dir, 'package-info.java')
fout = open(target_file, "w")
fout.write(template_string % (version, revision, user, date, url, srcChecksum))
fout.close()
print("Checksummed %s src/**.java files" % file_count)
return 0
##########################
if __name__ == "__main__":
sys.exit(main())