blob: 19e602cf4be05045c2099a67f29eaf36c376d15e [file] [log] [blame]
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import dependency_check.version_comparer as version_comparer
import logging
import os.path
import re
import requests
import sys
import traceback
from datetime import datetime
from dependency_check.bigquery_client_utils import BigQueryClientUtils
from dependency_check.report_generator_config import ReportGeneratorConfig
from jira_utils.jira_manager import JiraManager
from requests.adapters import HTTPAdapter
from requests.packages.urllib3.util.retry import Retry
logging.getLogger().setLevel(logging.INFO)
class InvalidFormatError(Exception):
def __init__(self, message):
super(InvalidFormatError, self).__init__(message)
def extract_results(file_path):
"""
Extract the Java/Python dependency reports and return a collection of out-of-date dependencies.
Args:
file_path: the path of the raw reports
Return:
outdated_deps: a collection of dependencies that has updates
"""
outdated_deps = []
try:
with open(file_path) as raw_report:
see_oudated_deps = False
for line in raw_report:
if see_oudated_deps:
outdated_deps.append(line)
if line.startswith('The following dependencies have later '):
see_oudated_deps = True
raw_report.close()
return outdated_deps
except:
raise
def extract_single_dep(dep):
"""
Extract a single dependency check record from Java and Python reports.
Args:
dep: e.g " - org.assertj:assertj-core [2.5.0 -> 3.10.0]".
Return:
dependency name, current version, latest version.
"""
pattern = " - ([\s\S]*)\[([\s\S]*) -> ([\s\S]*)\]"
match = re.match(pattern, dep)
if match is None:
raise InvalidFormatError("Failed to extract the dependency information: {}".format(dep))
return match.group(1).strip(), match.group(2).strip(), match.group(3).strip()
def prioritize_dependencies(deps, sdk_type):
"""
Extracts and analyze dependency versions and release dates.
Returns a collection of dependencies which is "high priority" in html format:
1. dependency has major release. e.g org.assertj:assertj-core [2.5.0 -> 3.10.0]
2. dependency is 3 sub-versions behind the newest one. e.g org.tukaani:xz [1.5 -> 1.8]
3. dependency has not been updated for more than 6 months.
Args:
deps: A collection of outdated dependencies.
Return:
high_priority_deps: A collection of dependencies which need to be taken care of before next release.
"""
project_id = ReportGeneratorConfig.GCLOUD_PROJECT_ID
dataset_id = ReportGeneratorConfig.DATASET_ID
table_id = ReportGeneratorConfig.get_bigquery_table_id(sdk_type)
high_priority_deps = []
bigquery_client = BigQueryClientUtils(project_id, dataset_id, table_id)
jira_manager = JiraManager(ReportGeneratorConfig.BEAM_JIRA_HOST,
ReportGeneratorConfig.BEAM_JIRA_BOT_USRENAME,
ReportGeneratorConfig.BEAM_JIRA_BOT_PASSWORD,
ReportGeneratorConfig.get_owners_file(sdk_type))
for dep in deps:
try:
if re.match(r'https?://', dep.lstrip()):
# Gradle-version-plugin's output contains URLs of the libraries
continue
logging.info("\n\nStart processing: " + dep)
dep_name, curr_ver, latest_ver = extract_single_dep(dep)
curr_release_date = None
latest_release_date = None
group_id = None
if sdk_type == 'Java':
# extract the groupid and artifactid
group_id, artifact_id = dep_name.split(":")
dep_details_url = "{0}/{1}/{2}".format(ReportGeneratorConfig.MAVEN_CENTRAL_URL, group_id, artifact_id)
curr_release_date = find_release_time_from_maven_central(group_id, artifact_id, curr_ver)
latest_release_date = find_release_time_from_maven_central(group_id, artifact_id, latest_ver)
else:
dep_details_url = ReportGeneratorConfig.PYPI_URL + dep_name
curr_release_date = find_release_time_from_python_compatibility_checking_service(dep_name, curr_ver)
latest_release_date = find_release_time_from_python_compatibility_checking_service(dep_name, latest_ver)
if not curr_release_date or not latest_release_date:
curr_release_date, latest_release_date = query_dependency_release_dates_from_bigquery(bigquery_client,
dep_name,
curr_ver,
latest_ver)
dep_info = """<tr>
<td><a href=\'{0}\'>{1}</a></td>
<td>{2}</td>
<td>{3}</td>
<td>{4}</td>
<td>{5}</td>""".format(dep_details_url,
dep_name,
curr_ver,
latest_ver,
curr_release_date,
latest_release_date)
if (version_comparer.compare_dependency_versions(curr_ver, latest_ver) or
compare_dependency_release_dates(curr_release_date, latest_release_date)):
# Create a new issue or update on the existing issue
jira_issue = jira_manager.run(dep_name, curr_ver, latest_ver, sdk_type, group_id = group_id)
if (jira_issue and jira_issue.fields.status.name in ['Open', 'Reopened', 'Triage Needed']):
dep_info += "<td><a href=\'{0}\'>{1}</a></td></tr>".format(
ReportGeneratorConfig.BEAM_JIRA_HOST+"browse/"+ jira_issue.key,
jira_issue.key)
high_priority_deps.append(dep_info)
except:
traceback.print_exc()
continue
bigquery_client.clean_stale_records_from_table()
return high_priority_deps
def find_release_time_from_maven_central(group_id, artifact_id, version):
"""
Find release dates from Maven Central REST API.
Args:
group_id:
artifact_id:
version:
Return:
release date
"""
url = "http://search.maven.org/solrsearch/select?q=g:{0}+AND+a:{1}+AND+v:{2}".format(
group_id,
artifact_id,
version
)
logging.info('Finding release date of {0}:{1} {2} from the Maven Central'.format(
group_id,
artifact_id,
version
))
try:
response = request_session_with_retries().get(url)
if not response.ok:
logging.error("""Failed finding the release date of {0}:{1} {2}.
The response status code is not ok: {3}""".format(group_id,
artifact_id,
version,
str(response.status_code)))
return None
response_data = response.json()
release_timestamp_mills = response_data['response']['docs'][0]['timestamp']
release_date = datetime.fromtimestamp(release_timestamp_mills/1000).date()
return release_date
except Exception as e:
logging.error("Errors while extracting the release date: " + str(e))
return None
def find_release_time_from_python_compatibility_checking_service(dep_name, version):
"""
Query release dates by using Python compatibility checking service.
Args:
dep_name:
version:
Return:
release date
"""
url = 'http://104.197.8.72/?package={0}=={1}&python-version=2'.format(
dep_name,
version
)
logging.info('Finding release time of {0} {1} from the python compatibility checking service.'.format(
dep_name,
version
))
try:
response = request_session_with_retries().get(url)
if not response.ok:
logging.error("""Failed finding the release date of {0} {1}.
The response status code is not ok: {2}""".format(dep_name,
version,
str(response.status_code)))
return None
response_data = response.json()
release_datetime = response_data['dependency_info'][dep_name]['installed_version_time']
release_date = datetime.strptime(release_datetime, '%Y-%m-%dT%H:%M:%S').date()
return release_date
except Exception as e:
logging.error("Errors while extracting the release date: " + str(e))
return None
def request_session_with_retries():
"""
Create an http session with retries
"""
session = requests.Session()
retries = Retry(total=3)
session.mount('http://', HTTPAdapter(max_retries=retries))
session.mount('https://', HTTPAdapter(max_retries=retries))
return session
def query_dependency_release_dates_from_bigquery(bigquery_client, dep_name, curr_ver_in_beam, latest_ver):
"""
Query release dates of current version and the latest version from BQ tables.
Args:
bigquery_client: a bq client object that bundle configurations for API requests
dep_name: dependency name
curr_ver_in_beam: the current version used in beam
latest_ver: the later version
Return:
A tuple that contains `curr_release_date` and `latest_release_date`.
"""
try:
curr_release_date, is_currently_used_bool = bigquery_client.query_dep_info_by_version(dep_name, curr_ver_in_beam)
latest_release_date, _ = bigquery_client.query_dep_info_by_version(dep_name, latest_ver)
date_today = datetime.today().date()
# sync to the bigquery table on the dependency status of the currently used version.
if not is_currently_used_bool:
currently_used_version_in_db, currently_used_release_date_in_db = bigquery_client.query_currently_used_dep_info_in_db(dep_name)
if currently_used_version_in_db is not None:
bigquery_client.delete_dep_from_table(dep_name, currently_used_version_in_db)
bigquery_client.insert_dep_to_table(dep_name, currently_used_version_in_db, currently_used_release_date_in_db, is_currently_used=False)
if curr_release_date is None:
bigquery_client.insert_dep_to_table(dep_name, curr_ver_in_beam, date_today, is_currently_used=True)
else:
bigquery_client.delete_dep_from_table(dep_name, curr_ver_in_beam)
bigquery_client.insert_dep_to_table(dep_name, curr_ver_in_beam, curr_release_date, is_currently_used=True)
# sync to the bigquery table on the dependency status of the latest version.
if latest_release_date is None:
bigquery_client.insert_dep_to_table(dep_name, latest_ver, date_today, is_currently_used=False)
latest_release_date = date_today
except Exception:
raise
return curr_release_date, latest_release_date
def compare_dependency_release_dates(curr_release_date, latest_release_date):
"""
Compare release dates of current using version and the latest version.
Return true if the current version is behind over 60 days.
Args:
curr_release_date
latest_release_date
Return:
boolean
"""
if not curr_release_date or not latest_release_date:
return False
else:
if (latest_release_date - curr_release_date).days >= ReportGeneratorConfig.MAX_STALE_DAYS:
return True
return False
def generate_report(sdk_type):
"""
Write SDK dependency check results into a html report.
Args:
sdk_type: String [Java, Python, TODO: Go]
"""
report_name = ReportGeneratorConfig.FINAL_REPORT
raw_report = ReportGeneratorConfig.get_raw_report(sdk_type)
if os.path.exists(report_name):
append_write = 'a'
else:
append_write = 'w'
try:
# Extract dependency check results from build/dependencyUpdate
report = open(report_name, append_write)
if os.path.isfile(raw_report):
outdated_deps = extract_results(raw_report)
else:
report.write("Did not find the raw report of dependency check: {}".format(raw_report))
report.close()
return
# Prioritize dependencies by comparing versions and release dates.
high_priority_deps = prioritize_dependencies(outdated_deps, sdk_type)
# Write results to a report
subtitle = "<h2>High Priority Dependency Updates Of Beam {} SDK:</h2>\n".format(sdk_type)
table_fields = """<tr>
<td><b>{0}</b></td>
<td><b>{1}</b></td>
<td><b>{2}</b></td>
<td><b>{3}</b></td>
<td><b>{4}</b></td>
<td><b>{5}</b></td>
</tr>""".format("Dependency Name",
"Current Version",
"Latest Version",
"Release Date Of the Current Used Version",
"Release Date Of The Latest Release",
"JIRA Issue")
report.write(subtitle)
report.write("<table>\n")
report.write(table_fields)
for dep in high_priority_deps:
report.write("%s" % dep)
report.write("</table>\n")
except Exception as e:
traceback.print_exc()
logging.error("Failed generate the dependency report. " + str(e))
report.write('<p> {0} </p>'.format(str(e)))
report.close()
logging.info("Dependency check on {0} SDK complete. The report is created.".format(sdk_type))
def main(args):
"""
Main method.
Args:
args[0]: type of the check [Java, Python]
"""
generate_report(args[0])
if __name__ == '__main__':
main(sys.argv[1:])