convert emoji, handle conversion errors caused by newline chars

commit: d3a9cf87c8fa5fadb6f333916cdad6f4793c9f91 [log] [tgz]
author: Tomoko Uchida <tomoko.uchida.1111@gmail.com> Fri Jul 01 00:46:29 2022 +0900
committer: Tomoko Uchida <tomoko.uchida.1111@gmail.com> Fri Jul 01 00:46:29 2022 +0900
tree: 15fb5f0c91546016a9e3f59aea7f22fb7975acde
parent: 0c6cf7ebf3dddd188250199a36bfb1ac63cbf6cd [diff]
diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index 16c91ea..4fc5d17 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py

@@ -154,6 +154,27 @@
     return res
 
 
+JIRA_EMOJI_TO_UNICODE = {
+    "(y)": "\U0001F44D",
+    "(n)": "\U0001F44E",
+    "(i)": "\U0001F6C8",
+    "(/)": "\u2714",
+    "(x)": "\u274C",
+    "(!)": "\u26A0",
+    "(+)": "\u002B",
+    "(-)": "\u2212",
+    "(?)": "\u003F",
+    "(on)": "\U0001F4A1",
+    "(off)": "\U0001F4A1",
+    "(*)": "\u2B50",
+    "(*r)": "\u2B50",
+    "(*g)": "\u2B50",
+    "(*b)": "\u2B50",
+    "(flag)": "\U0001F3F4",
+    "(flagoff)": "\U0001F3F3"
+}
+
+REGEX_CRLF = re.compile(r"\r\n\s*")
 REGEX_JIRA_KEY = re.compile(r"[^/]LUCENE-\d+")
 REGEX_MENTION = re.compile(r"@\w+")
 REGEX_LINK = re.compile(r"\[([^\]]+)\]\(([^\)]+)\)")
@@ -169,7 +190,9 @@
                 res = f"[{m.group(1)}]({repl})"
         return res
 
-    text = text.replace("\r\n", "\n")
+    text = re.sub(REGEX_CRLF, "\n", text)
+    for emoji, unicode in JIRA_EMOJI_TO_UNICODE.items():
+        text = text.replace(emoji, unicode)
     text = jira2markdown.convert(text)
 
     # markup @ mentions with ``
commit	d3a9cf87c8fa5fadb6f333916cdad6f4793c9f91	[log] [tgz]
author	Tomoko Uchida <tomoko.uchida.1111@gmail.com>	Fri Jul 01 00:46:29 2022 +0900
committer	Tomoko Uchida <tomoko.uchida.1111@gmail.com>	Fri Jul 01 00:46:29 2022 +0900
tree	15fb5f0c91546016a9e3f59aea7f22fb7975acde
parent	0c6cf7ebf3dddd188250199a36bfb1ac63cbf6cd [diff]