#!/usr/bin/env python

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

# Utility for creating well-formed pull request merges and pushing
# them to Apache. This script is a modified version of the one created
# by the Spark project
# (https://github.com/apache/spark/blob/master/dev/merge_spark_pr.py).
#
# Usage: ./dl-merge-pr.py (see config env vars below)
#
# This utility assumes you already have local a distributedlog git folder and that you
# have added remotes corresponding to both:
# (i) the github apache distributedlog mirror and
# (ii) the apache distributedlog git repo.

import json
import os
import re
import subprocess
import sys
import urllib2

try:
  import jira.client
  JIRA_IMPORTED = True
except ImportError:
  JIRA_IMPORTED = False

PROJECT_NAME = 'distributedlog'

CAPITALIZED_PROJECT_NAME = 'DL'

# Location of the local git repository
REPO_HOME = os.environ.get('{0}_HOME'.format(CAPITALIZED_PROJECT_NAME), os.getcwd())

# Remote name which points to the GitHub site
PR_REMOTE_NAME = os.environ.get('PR_REMOTE_NAME', 'apache-github')

# Remote name which points to Apache git
PUSH_REMOTE_NAME = os.environ.get('PUSH_REMOTE_NAME', 'apache')

# ASF JIRA username
JIRA_USERNAME = os.environ.get('JIRA_USERNAME', '')

# ASF JIRA password
JIRA_PASSWORD = os.environ.get('JIRA_PASSWORD', '')

# OAuth key used for issuing requests against the GitHub API. If this
# is not defined, then requests will be unauthenticated. You should
# only need to configure this if you find yourself regularly exceeding
# your IP's unauthenticated request rate limit. You can create an
# OAuth key at https://github.com/settings/tokens. This script only
# requires the "public_repo" scope.
GITHUB_OAUTH_KEY = os.environ.get('GITHUB_OAUTH_KEY')

GITHUB_USER = os.environ.get('GITHUB_USER', 'apache')
GITHUB_BASE = 'https://github.com/{0}/{1}/pull'.format(GITHUB_USER, PROJECT_NAME)
GITHUB_PR_REMOTE = 'https://github.com/{0}/{1}.git'.format(GITHUB_USER, PROJECT_NAME)
GITHUB_API_URL  = 'https://api.github.com'
GITHUB_API_BASE = '{0}/repos/{1}/{2}'.format(GITHUB_API_URL, GITHUB_USER, PROJECT_NAME)
JIRA_BASE = 'https://issues.apache.org/jira/browse'
JIRA_API_BASE = 'https://issues.apache.org/jira'

PUSH_PR_REMOTE = 'https://gitbox.apache.org/repos/asf/{0}.git'.format(PROJECT_NAME)

# Prefix added to temporary branches
TEMP_BRANCH_PREFIX = 'PR_TOOL'
RELEASE_BRANCH_PREFIX = ''

DEV_BRANCH_NAME = 'master'
DEFAULT_FIX_VERSION = os.environ.get("DEFAULT_FIX_VERSION", "0.5.0")

def get_json(url, preview_api = False):
  """
  Returns parsed JSON from an API call to the GitHub API.
  """
  try:
    request = urllib2.Request(url)
    if GITHUB_OAUTH_KEY:
      request.add_header('Authorization', 'token {0}'.format(GITHUB_OAUTH_KEY))
    if preview_api:
      request.add_header('Accept', 'application/vnd.github.black-cat-preview+json')
    return json.load(urllib2.urlopen(request))
  except urllib2.HTTPError as e:
    if 'X-RateLimit-Remaining' in e.headers and e.headers['X-RateLimit-Remaining'] == '0':
      print('Exceeded the GitHub API rate limit; see the instructions in ',
        'dl-merge-pr.py to configure an OAuth token for making authenticated ',
        'GitHub requests.')
    else:
      print('Unable to fetch URL, exiting: {0}'.format(url))
      sys.exit(-1)


def fail(msg):
  """
  Prints a message, cleans up and exits.
  """
  print(msg)
  clean_up()
  sys.exit(-1)


def run_cmd(cmd, verbose=False):
  """
  Runs a command.
  """
  if verbose is True:
    print(cmd)
  return subprocess.check_output(cmd)


def continue_maybe(prompt):
  """
  Asks confirmation before continuing to run.
  """
  result = raw_input('\n{0} (y/n): '.format(prompt))
  if result.lower() != 'y':
    fail('Okay, exiting')


def clean_up():
  """
  Cleans up the repository to put it back to it's original state.
  """
  if original_head != get_current_branch():
    print('Restoring head pointer to {0}'.format(original_head))
    run_cmd(['git', 'checkout', original_head])

  branches = run_cmd(['git', 'branch']).replace(" ", "").split('\n')

  for branch in filter(lambda x: x.startswith(TEMP_BRANCH_PREFIX), branches):
    print('Deleting local branch {0}'.format(branch))
    run_cmd(['git', 'branch', '-D', branch])


def get_current_branch():
  """
  Returns the current branch
  """
  return run_cmd(['git', 'rev-parse', '--abbrev-ref', 'HEAD']).rstrip()


def merge_pr(pr_num, target_ref, title, body, default_pr_reviewers, pr_repo_desc):
  """
  Merges the requested PR and return the merge hash.
  """
  pr_branch_name = '{0}_MERGE_PR_{1}'.format(TEMP_BRANCH_PREFIX, pr_num)
  target_branch_name = '{0}_MERGE_PR_{1}_{2}'.format(TEMP_BRANCH_PREFIX, pr_num, target_ref.upper())
  run_cmd(['git', 'fetch', PR_REMOTE_NAME, 'pull/{0}/head:{1}'.format(pr_num, pr_branch_name)])
  run_cmd(['git', 'fetch', PUSH_PR_REMOTE, '{0}:{1}'.format(target_ref, target_branch_name)])
  run_cmd(['git', 'checkout', target_branch_name])

  had_conflicts = False
  try:
    run_cmd(['git', 'merge', pr_branch_name, '--squash'])
  except Exception as e:
    msg = 'Error merging: {0}\nWould you like to manually fix-up this merge?'.format(e)
    continue_maybe(msg)
    msg = 'Okay, please fix any conflicts and \'git add\' conflicting files... Finished?'
    continue_maybe(msg)
    had_conflicts = True

  # Offer to run unit tests before committing
  result = raw_input('Do you want to validate unit tests after the merge? (y/n): ')
  if result.lower() == 'y':
    test_res = subprocess.call('mvn clean install'.split())
    if test_res == 0:
      print('Unit tests execution succeeded')
    else:
      continue_maybe('Unit tests execution FAILED. Do you want to continue with the merge anyway?')

  commit_authors = run_cmd(['git', 'log', 'HEAD..{0}'.format(pr_branch_name), '--pretty=format:%an <%ae>']).split("\n")
  distinct_authors = sorted(set(commit_authors), key=lambda x: commit_authors.count(x), reverse=True)
  primary_author = raw_input('Enter primary author in the format of \'name <email>\' [{0}]: '.format(distinct_authors[0]))
  if primary_author == '':
    primary_author = distinct_authors[0]

  reviewers = raw_input('Enter reviewers [{0}]: '.format(default_pr_reviewers)).strip()
  if reviewers == '':
    reviewers = default_pr_reviewers

  commits = run_cmd(['git', 'log', 'HEAD..{0}'.format(pr_branch_name), '--pretty=format:%h [%an] %s']).split('\n')

  if len(commits) > 1:
    result = raw_input('List pull request commits in squashed commit message? (y/n): ')
    if result.lower() == 'y':
      should_list_commits = True
    else:
      should_list_commits = False
  else:
    should_list_commits = False

  merge_message_flags = []

  merge_message_flags += ['-m', title]
  if body is not None:
    # We remove @ symbols from the body to avoid triggering e-mails
    # to people every time someone creates a public fork of the project.
    merge_message_flags += ['-m', body.replace('@', '')]

  authors = '\n'.join(['Author: {0}'.format(a) for a in distinct_authors])

  merge_message_flags += ['-m', authors]

  if (reviewers != ''):
    merge_message_flags += ['-m', 'Reviewers: {0}'.format(reviewers)]

  if had_conflicts:
    committer_name = run_cmd(['git', 'config', '--get', 'user.name']).strip()
    committer_email = run_cmd(['git', 'config', '--get', 'user.email']).strip()
    message = 'This patch had conflicts when merged, resolved by\nCommitter: {0} <{1}>'.format(committer_name, committer_email)
    merge_message_flags += ['-m', message]

  # The string "Closes #%s" string is required for GitHub to correctly close the PR
  close_line = 'Closes #{0} from {1}'.format(pr_num, pr_repo_desc)
  if should_list_commits:
    close_line += ' and squashes the following commits:'

  # Append 'Closes #%s' commit message to close the PR
  merge_message_flags += ['-m', close_line]

  if should_list_commits:
    merge_message_flags += ['-m', '\n'.join(commits)]

  run_cmd(['git', 'commit', '--author="%s"' % primary_author] + merge_message_flags)

  continue_maybe('Merge complete (local ref {0}). Push to {1}?'.format(target_branch_name, PUSH_REMOTE_NAME))

  try:
    run_cmd(['git', 'push', PUSH_REMOTE_NAME, '{0}:{1}'.format(target_branch_name, target_ref)])
  except Exception as e:
    clean_up()
    fail('Exception while pushing: {0}'.format(e))

  merge_hash = run_cmd(['git', 'rev-parse', target_branch_name])[:8]
  merge_log = run_cmd(['git', 'show', '--format=fuller', '-q', target_branch_name])
  clean_up()
  print('Pull request #{0} merged!'.format(pr_num))
  print('Merge hash: {0}'.format(merge_hash))
  return merge_hash, merge_log


def cherry_pick(pr_num, merge_hash, default_branch):
  pick_ref = raw_input('Enter a branch name [{0}]: '.format(default_branch))
  if pick_ref == '':
    pick_ref = default_branch

  pick_branch_name = '{0}_PICK_PR_{1}_{2}'.format(TEMP_BRANCH_PREFIX, pr_num, pick_ref.upper())

  run_cmd(['git', 'fetch', PUSH_REMOTE_NAME, '{0}:{1}'.format(pick_ref, pick_branch_name)])
  run_cmd(['git', 'checkout', pick_branch_name])

  try:
    run_cmd(['git', 'cherry-pick', '-sx', merge_hash])
  except Exception as e:
    msg = 'Error cherry-picking: {0}\nWould you like to manually fix-up this merge?'.format(e)
    continue_maybe(msg)
    msg = 'Okay, please fix any conflicts and finish the cherry-pick. Finished?'
    continue_maybe(msg)

  continue_maybe('Pick complete (local ref {0}). Push to {1}?'.format(pick_branch_name, PUSH_REMOTE_NAME))

  try:
    run_cmd(['git', 'push', PUSH_REMOTE_NAME, '{0}:{1}'.format(pick_branch_name, pick_ref)])
  except Exception as e:
    clean_up()
    fail('Exception while pushing: {0}'.format(e))

  pick_hash = run_cmd(['git', 'rev-parse', pick_branch_name])[:8]
  clean_up()

  print('Pull request #{0} picked into {1}!'.format(pr_num, pick_ref))
  print('Pick hash: {0}'.format(pick_hash))
  return pick_ref


def fix_version_from_branch(branch, versions):
  # Note: Assumes this is a sorted (newest->oldest) list of un-released versions
  if branch == DEV_BRANCH_NAME:
    versions = filter(lambda x: x == DEFAULT_FIX_VERSION, versions)
    if len(versions) > 0:
      return versions[0]
    else:
      return None
  else:
    versions = filter(lambda x: x.startswith(branch), versions)
    if len(versions) > 0:
      return versions[-1]
    else:
      return None


def resolve_jira_issue(merge_branches, comment, jira_id):
  """
  Resolves the JIRA issue associated with the pull request.
  """
  asf_jira = jira.client.JIRA({'server': JIRA_API_BASE},
                              basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))

  result = raw_input('Resolve JIRA {0} ? (y/n): '.format(jira_id))
  if result.lower() != 'y':
    return

  try:
    issue = asf_jira.issue(jira_id)
  except Exception as e:
    fail('ASF JIRA could not find {0}\n{1}'.format(jira_id, e))

  cur_status = issue.fields.status.name
  cur_summary = issue.fields.summary
  cur_assignee = issue.fields.assignee
  if cur_assignee is None:
    cur_assignee = 'NOT ASSIGNED!!!'
  else:
    cur_assignee = cur_assignee.displayName

  if cur_status == 'Resolved' or cur_status == 'Closed':
    fail('JIRA issue {0} already has status \'{1}\''.format(jira_id, cur_status))

  print ('=== JIRA {0} ==='.format(jira_id))
  print ('summary\t\t{0}\nassignee\t{1}\nstatus\t\t{2}\nurl\t\t{3}/{4}\n'.format(
    cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id))

  versions = asf_jira.project_versions(CAPITALIZED_PROJECT_NAME)
  versions = sorted(versions, key=lambda x: x.name, reverse=True)
  versions = filter(lambda x: x.raw['released'] is False, versions)

  version_names = map(lambda x: x.name, versions)
  default_fix_versions = map(lambda x: fix_version_from_branch(x, version_names), merge_branches)
  default_fix_versions = filter(lambda x: x != None, default_fix_versions)
  default_fix_versions = ','.join(default_fix_versions)

  fix_versions = raw_input('Enter comma-separated fix version(s) [{0}]: '.format(default_fix_versions))
  if fix_versions == '':
    fix_versions = default_fix_versions
    fix_versions = fix_versions.replace(' ', '').split(',')

  def get_version_json(version_str):
    return filter(lambda v: v.name == version_str, versions)[0].raw

  jira_fix_versions = map(lambda v: get_version_json(v), fix_versions)

  resolve = filter(lambda a: a['name'] == 'Resolve Issue', asf_jira.transitions(jira_id))[0]
  resolution = filter(lambda r: r.raw['name'] == 'Fixed', asf_jira.resolutions())[0]
  asf_jira.transition_issue(
    jira_id, resolve['id'], fixVersions = jira_fix_versions,
    comment = comment, resolution = {'id': resolution.raw['id']})

  print 'Successfully resolved {0} with fixVersions={1}!'.format(jira_id, fix_versions)


def resolve_jira_issues(title, merge_branches, comment):
  """
  Resolves a list of jira issues.
  """
  jira_ids = re.findall('%s-[0-9]+' % CAPITALIZED_PROJECT_NAME, title)

  if len(jira_ids) == 0:
    print 'No JIRA issue found to update'
  for jira_id in jira_ids:
    resolve_jira_issue(merge_branches, comment, jira_id)


def standardize_jira_ref(text):
  """
  Standardize the jira reference commit message prefix to "PROJECT_NAME-XXX: Issue"

  >>> standardize_jira_ref("%s-877: Script for generating patch for reviews" % CAPITALIZED_PROJECT_NAME)
  'DL-877: Script for generating patch for reviews'
  """
  jira_refs = []
  components = []

  # Extract JIRA ref(s):
  pattern = re.compile(r'(%s[-\s]*[0-9]+)+' % CAPITALIZED_PROJECT_NAME, re.IGNORECASE)
  for ref in pattern.findall(text):
    # Add brackets, replace spaces with a dash, & convert to uppercase
    jira_refs.append(re.sub(r'\s+', '-', ref.upper()))
    text = text.replace(ref, '')

  # Extract project name component(s):
  # Look for alphanumeric chars, spaces, dashes, periods, and/or commas
  pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
  for component in pattern.findall(text):
    components.append(component.upper())
    text = text.replace(component, '')

  # Cleanup any remaining symbols:
  pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
  if (pattern.search(text) is not None):
    text = pattern.search(text).groups()[0]

  # Assemble full text (JIRA ref(s), module(s), remaining text)
  jira_prefix = ' '.join(jira_refs).strip()
  if jira_prefix:
    jira_prefix = jira_prefix + ": "

  clean_text = jira_prefix + ' '.join(components).strip() + " " + text.strip()

  # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were included
  clean_text = re.sub(r'\s+', ' ', clean_text.strip())

  return clean_text


def get_reviewers(pr_num):
  """
  Gets a candidate list of reviewers that have commented on the PR with '+1' or 'LGTM'
  """
  reviewers_ids = set()

  approval_msgs = ['+1', 'lgtm']
  pr_comments = get_json('{0}/issues/{1}/comments'.format(GITHUB_API_BASE, pr_num))
  for comment in pr_comments:
    for approval_msg in approval_msgs:
      if approval_msg in comment['body'].lower():
        reviewers_ids.add(comment['user']['login'])

  approval_review_states = ['approved']
  pr_reviews = get_json('{0}/pulls/{1}/reviews'.format(GITHUB_API_BASE, pr_num), True)
  for review in pr_reviews:
    for approval_state in approval_review_states:
      if approval_state in review['state'].lower():
        reviewers_ids.add(review['user']['login'])

  if len(reviewers_ids) == 0:
    fail("No approvals found in this pull request")

  dir_path = os.path.dirname(os.path.realpath(__file__))
  with open('{0}/reviewers'.format(dir_path)) as reviewers_data:
    reviewers = json.load(reviewers_data)

  reviewers_emails = []
  for reviewer_id in reviewers_ids:
    username = None
    useremail = None
    if reviewer_id in reviewers:
      reviewer = reviewers[reviewer_id]
      username = reviewer['name']
      useremail = reviewer['email']
      reviewers_emails += ['{0} <{1}>'.format(username, useremail)]
      continue
    user = get_json('{0}/users/{1}'.format(GITHUB_API_URL, reviewer_id))
    if user['email'] is not None:
        useremail = user['email'].strip()
    else:
        useremail = None
    if user['name'] is not None:
        username = user['name'].strip()
    else:
        username = useremail
    if username is None:
        continue
    reviewers_emails += ['{0} <{1}>'.format(username, useremail)]
  return ', '.join(reviewers_emails)


def check_remote_repos():
  """
  Checks that we have the remote repository configured.
  """
  remotes = run_cmd(['git', 'remote', 'show']).rstrip().split('\n')
  if PR_REMOTE_NAME not in remotes:
    _add_remote_repos(PR_REMOTE_NAME, GITHUB_PR_REMOTE)
  if PUSH_REMOTE_NAME not in remotes:
    _add_remote_repos(PUSH_REMOTE_NAME, PUSH_PR_REMOTE)


def _add_remote_repos(remote_name, remote_url):
  """
  Asks if we want to configure the remote repository.
  """
  result = raw_input('You don\'t have {0} set as a remote, do you want me to set this up for you? [y/n]: '.format(remote_name))
  if result.lower() != 'y':
    print('You need to set {0} before continuing'.format(PR_REMOTE_NAME))
    sys.exit(1)
  else:
    run_cmd(['git', 'remote', 'add', remote_name, remote_url])


def main():
  global original_head

  # we need the remote repository to be set
  check_remote_repos()

  original_head = get_current_branch()

  branches = get_json('{0}/branches'.format(GITHUB_API_BASE))

  branch_names = filter(lambda x: x.startswith(RELEASE_BRANCH_PREFIX), [x['name'] for x in branches])

  if len(branch_names) == 0:
    print('No remote branch.')
    sys.exit(0)

  # Assumes branch names can be sorted lexicographically
  latest_branch = sorted(branch_names, reverse=True)[0]

  pr_num = raw_input('Which pull request would you like to merge? (e.g. 34): ')
  pr = get_json('{0}/pulls/{1}'.format(GITHUB_API_BASE, pr_num))
  pr_events = get_json('{0}/issues/{1}/events'.format(GITHUB_API_BASE, pr_num))
  pr_reviewers = get_reviewers(pr_num)

  url = pr['url']

  pr_title = pr['title']
  commit_title = raw_input('Commit title [{0}]: '.format(pr_title.encode('utf-8')).decode('utf-8'))
  if commit_title == '':
    commit_title = pr_title

  # Decide whether to use the modified title or not
  modified_title = standardize_jira_ref(commit_title)
  if modified_title != commit_title:
    print 'I\'ve re-written the title as follows to match the standard format:'
    print 'Original: {0}'.format(commit_title)
    print 'Modified: {0}'.format(modified_title)
    result = raw_input('Would you like to use the modified title? (y/n): ')
    if result.lower() == 'y':
      commit_title = modified_title
      print 'Using modified title:'
    else:
      print 'Using original title:'
      print commit_title

  body = pr['body']
  target_ref = pr['base']['ref']
  user_login = pr['user']['login']
  base_ref = pr['head']['ref']
  pr_repo_desc = '{0}/{1}'.format(user_login, base_ref)

  # Merged pull requests don't appear as merged in the GitHub API;
  # Instead, they're closed by asfgit.
  merge_commits = []
  for e in pr_events:
    if e['event'] == 'closed':
      if e['actor'] is not None and e['actor']['login'] == 'asfgit':
        merge_commits.append(e)

  if merge_commits:
    merge_hash = merge_commits[0]['commit_id']
    message = get_json('{0}/commits/{1}'.format(GITHUB_API_BASE, merge_hash))['commit']['message']

    print 'Pull request {0} has already been merged, assuming you want to backport'.format(pr_num)
    commit_is_downloaded = run_cmd(
      ['git', 'rev-parse', '--quiet', '--verify', '%s^{commit}' % merge_hash]
    ).strip() != ''
    if not commit_is_downloaded:
      fail('Could not find any merge commit for #{0}, you may need to update HEAD.'.format(pr_num))

    print 'Found commit %s:\n%s' % (merge_hash, message)
    cherry_pick(pr_num, merge_hash, latest_branch)
    sys.exit(0)

  if not bool(pr['mergeable']):
    msg = 'Pull request {0} is not mergeable in its current form.\n'.format(pr_num) + \
          'Continue? (experts only!)'
    continue_maybe(msg)

  print ('\n=== Pull Request #{0} ==='.format(pr_num))
  print ('PR title\t{0}\nCommit title\t{1}\nSource\t\t{2}\nTarget\t\t{3}\nURL\t\t{4}'.format(
    pr_title, commit_title, pr_repo_desc, target_ref, url
  ))
  continue_maybe('Proceed with merging pull request #{0}?'.format(pr_num))

  merged_refs = [target_ref]

  merge_hash, merge_commit_log = merge_pr(pr_num, target_ref, commit_title, body, pr_reviewers, pr_repo_desc)

  pick_prompt = 'Would you like to pick %s into another branch?' % merge_hash
  while raw_input('\n%s (y/n): ' % pick_prompt).lower() == 'y':
    merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)]

  if JIRA_IMPORTED:
    if JIRA_USERNAME and JIRA_PASSWORD:
      jira_comment = '''Issue resolved by merging pull request %s
      [%s/%s]

      {noformat}
      %s
      {noformat}
      ''' % (pr_num, GITHUB_BASE, pr_num, merge_commit_log)
      resolve_jira_issues(commit_title, merged_refs, jira_comment)
    else:
      print 'JIRA_USERNAME and JIRA_PASSWORD are not set'
      print 'Exiting without trying to close the associated JIRA.'
  else:
    print 'Could not find jira-python library. Run \'sudo pip install jira\' to install.'
    print 'Exiting without trying to close the associated JIRA.'


if __name__ == "__main__":
  import doctest
  (failure_count, test_count) = doctest.testmod()
  if (failure_count):
    exit(-1)

  main()
