Merge pull request #1 from chrisdutz/feature/customized-subject-lines

feature: Added initial support for custom email subjects for GitHub d…
diff --git a/README.md b/README.md
index 7578030..476136b 100644
--- a/README.md
+++ b/README.md
@@ -11,5 +11,48 @@
 notifications:
   commits: commits@foo.apache.org
   discussions: issues@foo.apache.org
+
+custom_subjects:
+    new_discussion: "Created: Discussion {repository}: {title}"
+    edit_discussion: "Edited: Discussion {repository}: {title}"
+    close_discussion: "Closed: Discussion {repository}: {title}"
+    close_discussion_with_comment: "Closed: Discussion with comment {repository}: {title}"
+    reopen_discussion: "Reopened: Discussion {repository}: {title}"
+    new_comment_discussion: "Commented: Discussion {repository}: {title}"
+    edit_comment_discussion: "Edited a comment: Discussion {repository}: {title}"
+    delete_comment_discussion: "Deleted a comment: Discussion {repository}: {title}"
   ~~~
+
+## Possible Actions
+
+The different actions I identified and how to detect them:
+
+- Discussion Created:
+  - Action: "created"
+  - No "comment" element
+- Discussion Edited:
+  - Action: "edited"
+  - No "comment" element
+- Discussion Closed without comment:
+  - Action: "closed"
+  - No "comment" element
+- Discussion Closed with comment:
+  - Action: "created"
+  - Existing "comment" element
+  - Discussion/State: "closed"
+- Comment Added:
+  - Action: "created"
+  - Existing "comment" element
+  - Discussion/State: "open"
+- Comment Edited:
+  - Action: "edited"
+  - Existing "comment" element
+  - Discussion/State: "open"
+- Comment Deleted:
+  - Action: "deleted"
+  - Existing "comment" element
+  - Discussion/State: "open"
+
+NOTE: Problem is, that it seems impossible to distinguish between someone adding a comment to a closed discussion and someone closing a discussion with a comment.
+For simplicity reasons, we'll assume that if a comment is added and the discussion state is "closed", that this is someone closing a discussion with a comment.
   
diff --git a/ghd-notifier.py b/ghd-notifier.py
index 1c52533..7583bfb 100644
--- a/ghd-notifier.py
+++ b/ghd-notifier.py
@@ -21,18 +21,36 @@
 import requests
 import logging
 import yaml
+import yaml.parser
 import os
 import uuid
 
 """GitHub Discussions Notifier"""
 
 REPO_ROOT = "/x1/repos/asf"
-VALID_THREAD_ACTIONS = ["created", "edited", "deleted"]
+GHSETTINGS_ROOT = "/x1/asfyaml"
+VALID_THREAD_ACTIONS = ["created", "edited", "closed", "reopened"]
 VALID_COMMENT_ACTIONS = ["created", "edited", "deleted"]
 THREAD_ACTION = open("templates/thread-action.txt").read()
 COMMENT_ACTION = open("templates/comment-action.txt").read()
 
 
+def get_custom_subject(repository, action="catchall"):
+    """Gets a subject template for a specific action, if specified via .asf.yaml"""
+    gh_settings_path = os.path.join(GHSETTINGS_ROOT, f"ghsettings.{repository}.yml")  # Path to github settings yaml file
+    if os.path.isfile(gh_settings_path):
+        try:
+            yml = yaml.safe_load(open(gh_settings_path))
+        except yaml.parser.ParserError:  # Invalid YAML?!
+            return
+        custom_subjects = yml.get("custom_subjects")
+        if custom_subjects and isinstance(custom_subjects, dict):
+            if action in custom_subjects:
+                return custom_subjects[action]
+            elif "catchall_discussions" in custom_subjects:  # If no custom subject exists for this action, but catchall does...
+                return custom_subjects["catchall_discussions"]
+
+
 def get_recipient(repo):
     yaml_path = os.path.join(REPO_ROOT, f"{repo}.git", "notifications.yaml")
     if os.path.exists(yaml_path):
@@ -51,14 +69,44 @@
     category = discussion.get("category").get("slug")
     url = discussion.get("html_url")
     body = discussion.get("body")
-    repo = blob.get("repository").get("name")
+    repository = blob.get("repository").get("name")
     node_id = discussion.get("node_id")
     if action in VALID_THREAD_ACTIONS:
-        recipient = get_recipient(repo)
+        recipient = get_recipient(repository)
         if recipient:
-            unsub = recipient.replace("@", "-unsubscribe@")
+            # The templates contain templates for the subject (first part)
+            # and the content of the email (second part) ... split the template
+            # up.
             subject, text = THREAD_ACTION.split("--", 1)
-            subject = subject.format(**locals()).strip()
+
+            # Define the name of the template for this action.
+            action_name = "new_discussion"
+            if action == "created":
+                action_name = "new_discussion"
+            elif action == "edited":
+                action_name = "edit_discussion"
+            elif action == "closed":
+                action_name = "close_discussion"
+            elif action == "reopened":
+                action_name = "reopen_discussion"
+            # Note: the subjects are checked for validity in
+            # https://github.com/apache/infrastructure-p6/blob/production/modules/gitbox/files/asfgit/package/asfyaml.py
+            # See VALID_GITHUB_SUBJECT_VARIABLES and validate_github_subject()
+            # The variable names listed in VALID_GITHUB_SUBJECT_VARIABLES must be defined
+            # here as local variables
+            custom_subject_line = get_custom_subject(repository, action_name)  # Custom subject line?
+            try:
+                # If a custom subject line was defined, use that ...
+                if custom_subject_line:
+                    subject = custom_subject_line.format(**locals())
+                # Otherwise use the default one, which is located in the title of the template.
+                else:
+                    subject = subject.format(**locals()).strip()
+            except (KeyError, ValueError) as e:  # Template breakage can happen, ignore
+                print(e)
+                return
+
+            unsub = recipient.replace("@", "-unsubscribe@")
             text = text.format(**locals()).strip()
             msg_headers = {}
             msgid = "<ghd-%s-%s@gitbox.apache.org>" % (node_id, str(uuid.uuid4()))
@@ -72,45 +120,81 @@
                     "In-Reply-To": msgid_OP
                 }  # Thread from the actual discussion parent
             asfpy.messaging.mail(
-                sender="GitBox <git@apache.org>", recipient=recipient, subject=subject, message=text, messageid=msgid, headers=msg_headers
+                sender=f"\"{user} (via GitHub)\" <git@apache.org>", recipient=recipient, subject=subject, message=text, messageid=msgid, headers=msg_headers
             )
             return f"[send] {user} {action} {url}: {title}"
     return f"[skip] {user} {action} {url}: {title}"
 
 
+# The general difference between this and the general parse_thread_action
+# is that in this case we're getting the content in the email as well as the
+# user information from the "comment" element instead of the "discussion"
+# element.
 def parse_comment_action(blob):
     """Parses a comment action (comment created/edited/deleted)"""
     action = blob.get("action")
     discussion = blob.get("discussion")
+    discussion_state = discussion.get("state")
     comment = blob.get("comment")
     user = comment.get("user").get("login")
     title = discussion.get("title")
     category = discussion.get("category").get("slug")
     url = comment.get("html_url")
     body = comment.get("body")
-    repo = blob.get("repository").get("name")
+    repository = blob.get("repository").get("name")
     action_human = "???"
     node_id = discussion.get("node_id")
-    if action == "created":
+    # If the user closes a discussion with a comment, there is
+    # currently no way to distinguish this from a user commenting
+    # on a closed issue (if this is even possible). We're assuming
+    # that this doesn't happen and if the discussion state is
+    # "closed" that the user closed with a comment.
+    if action == "created" and discussion_state == "closed":
+        action_human = "closed the discussion with a comment:"
+        action_name = "close_discussion_with_comment"
+    elif action == "created":
         action_human = "added a comment to the discussion:"
+        action_name = "new_comment_discussion"
     elif action == "edited":
         action_human = "edited a comment on the discussion:"
+        action_name = "edit_comment_discussion"
     elif action == "deleted":
         action_human = "deleted a comment on the discussion:"
+        action_name = "delete_comment_discussion"
     if action in VALID_COMMENT_ACTIONS:
-        recipient = get_recipient(repo)
+        recipient = get_recipient(repository)
         if recipient:
+            # The templates contain templates for the subject (first part)
+            # and the content of the email (second part) ... split the template
+            # up.
+            subject, text = COMMENT_ACTION.split("--", 1)
+
+            # Note: the subjects are checked for validity in
+            # https://github.com/apache/infrastructure-p6/blob/production/modules/gitbox/files/asfgit/package/asfyaml.py
+            # See VALID_GITHUB_SUBJECT_VARIABLES and validate_github_subject()
+            # The variable names listed in VALID_GITHUB_SUBJECT_VARIABLES must be defined
+            # here as local variables
+            custom_subject_line = get_custom_subject(repository, action_name)  # Custom subject line?
+            try:
+                # If a custom subject line was defined, use that ...
+                if custom_subject_line:
+                    subject = custom_subject_line.format(**locals())
+                # Otherwise use the default one, which is located in the title of the template.
+                else:
+                    subject = subject.format(**locals()).strip()
+            except (KeyError, ValueError) as e:  # Template breakage can happen, ignore
+                print(e)
+                return
+
             msgid = "<ghd-%s-%s@gitbox.apache.org>" % (node_id, str(uuid.uuid4()))
             msgid_OP = "<ghd-%s@gitbox.apache.org>" % node_id
             unsub = recipient.replace("@", "-unsubscribe@")
-            subject, text = COMMENT_ACTION.split("--", 1)
-            subject = subject.format(**locals()).strip()
             text = text.format(**locals()).strip()
             msg_headers = {
                     "In-Reply-To": msgid_OP
                 }  # Thread from the actual discussion parent
             asfpy.messaging.mail(
-                sender="GitBox <git@apache.org>", recipient=recipient, subject=subject, message=text, messageid=msgid, headers=msg_headers
+                sender=f"\"{user} (via GitHub)\" <git@apache.org>", recipient=recipient, subject=subject, message=text, messageid=msgid, headers=msg_headers
             )
             return f"[send] [comment] {user} {action} {url}: {title}"
     return f"[skip] [comment] {user} {action} {url}: {title}"
@@ -118,24 +202,38 @@
 
 def main():
 
-    # Grab all GitHub WebHook IP ranges
+    # Grab all GitHub WebHook IP ranges and save them, so we can check if an
+    # incoming request is originating from one of these IP addresses.
     webhook_ips = requests.get("https://api.github.com/meta").json()["hooks"]
     allowed_ips = [netaddr.IPNetwork(ip) for ip in webhook_ips]
 
     # Init Flask...
     app = flask.Flask(__name__)
 
+    # This will make Flask react to requests aimed at /hook, which the GitHub
+    # webhook service will be calling.
     @app.route("/hook", methods=["POST", "PUT"])
     def parse_request():
+        # Get the IP address, the request is originating from.
+        # (I assume the "X-Forwarded-For" is used when using tools like ngrok
+        # to forward requests to protected locations, "flask.request.remote_addr"
+        # contains the ip in the direct access case)
         this_ip = netaddr.IPAddress(flask.request.headers.get("X-Forwarded-For") or flask.request.remote_addr)
+
+        # Check if this incoming request is originating from one of the
+        # GitHub webhook IP addresses. Deny the request, if it's not.
         allowed = any(this_ip in ip for ip in allowed_ips)
         if not allowed:
             return "No content\n"
+
+        # Process the incoming message.
         content = flask.request.json
-        act = content.get("action")
+        # GitHub Discussion notifications are all expected to have a "discussion" element.
         if "discussion" in content:
+            # If this is a comment action, it will also contain a "comment" element
             if "comment" in content:
                 logmsg = parse_comment_action(content)
+            # Otherwise it's a basic "create", "edit", "close" operation.
             else:
                 logmsg = parse_thread_action(content)
             log.log(level=logging.WARNING, msg=logmsg)
@@ -145,7 +243,7 @@
     log = logging.getLogger("werkzeug")
     log.setLevel(logging.WARNING)
 
-    # Start up the app
+    # Start up the app (Starts the Flask webserver)
     app.run(host="127.0.0.1", port=8084, debug=False)
 
 
diff --git a/templates/comment-action.txt b/templates/comment-action.txt
index bbfb436..658c15c 100644
--- a/templates/comment-action.txt
+++ b/templates/comment-action.txt
@@ -1,5 +1,4 @@
-
-[GitHub] [{repo}] {user} {action_human} {title}
+[GitHub] [{repository}] {user} {action_human} {title}
 --
 GitHub user {user} {action_human} {title}
 
diff --git a/templates/thread-action.txt b/templates/thread-action.txt
index ee84b1c..aa95a6e 100644
--- a/templates/thread-action.txt
+++ b/templates/thread-action.txt
@@ -1,4 +1,4 @@
-[GitHub] [{repo}] {user} {action} a discussion: {title}
+[GitHub] [{repository}] {user} {action} a discussion: {title}
 --
 GitHub user {user} {action} a discussion: {title}