Create hyperlinks to other ASF projects for PRJ-XXXX style cross-issue links. (#131)

diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index 9ee2682..f1bffea 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py
@@ -341,3 +341,699 @@
     text = re.sub(r"\[(LUCENE-\d+)\]\(https?[^\)]+LUCENE-\d+\)", repl_md_link, text)
 
     return text
+
+
+ASF_JIRA_BASE = "https://issues.apache.org/jira/browse/"
+
+def create_issue_links_outside_projects(text: str) -> str:
+    """Create links to outside ASF projects.
+    """
+
+    def repl_simple(m: re.Match):
+        prj = m.group(2).split("-")[0]
+        if prj not in ALL_JIRA_PROJECTS:
+            return m.group(0)
+        jira_link = ASF_JIRA_BASE + m.group(2)
+        return f"{m.group(1)}[{m.group(2)}]({jira_link}){m.group(3)}"
+
+    def repl_paren(m: re.Match):
+        prj = m.group(2).split("-")[0]
+        if prj not in ALL_JIRA_PROJECTS:
+            return m.group(0)
+        jira_link = ASF_JIRA_BASE + m.group(2)
+        return f"{m.group(1)}[{m.group(2)}]({jira_link}){m.group(3)}"
+
+    def repl_bracket(m: re.Match):
+        prj = m.group(2).split("-")[0]
+        if prj not in ALL_JIRA_PROJECTS:
+            return m.group(0)
+        jira_link = ASF_JIRA_BASE + m.group(2)
+        return jira_link
+
+    text = re.sub(r"(\s)([A-Z0-9]{2,20}-\d+)([\s,;:\?\!\.])", repl_simple, text)
+    text = re.sub(r"(^)([A-Z0-9]{2,20}-\d+)([\s,;:\?\!\.])", repl_simple, text)
+    text = re.sub(r"(\()([A-Z0-9]{2,20}-\d+)(\))", repl_paren, text)
+    text = re.sub(r"(\[)([A-Z0-9]{2,20}-\d+)(\])(?!\()", repl_bracket, text)
+
+    return text
+
+
+ALL_JIRA_PROJECTS = set([
+    "AAR",
+    "ABDERA",
+    "ACCUMULO",
+    "ACE",
+    "ACL",
+    "AMQ",
+    "AMQNET",
+    "APLO",
+    "ARTEMIS",
+    "AMQCPP",
+    "AMQCLI",
+    "OPENWIRE",
+    "BLAZE",
+    "ADDR",
+    "AGILA",
+    "AIRAVATA",
+    "ALOIS",
+    "ARMI",
+    "AMATERASU",
+    "AMBARI",
+    "AMBER",
+    "ANAKIA",
+    "AGE2",
+    "AGEOLD",
+    "ANY23",
+    "APEXCORE",
+    "APEXMALHAR",
+    "ARROW",
+    "ASTERIXDB",
+    "AVRO",
+    "AWF",
+    "BLUEMARLIN",
+    "BLUR",
+    "CHAINSAW",
+    "CMDA",
+    "COMMONSSITE",
+    "COMMONSRDF",
+    "TESTING",
+    "CONCERTED",
+    "CORAL",
+    "CB",
+    "CRAIL",
+    "CURATOR",
+    "DATALAB",
+    "DATASKETCHES",
+    "DIRECTMEMORY",
+    "DORIS",
+    "DRILL",
+    "DUBBO",
+    "ECHARTS",
+    "EVENTMESH",
+    "FINERACT",
+    "FLEX",
+    "FREEMARKER",
+    "GEARPUMP",
+    "GOBBLIN",
+    "GORA",
+    "HAWQ",
+    "HELIX",
+    "HOP",
+    "HORN",
+    "HUDI",
+    "INLONG",
+    "IOTDB",
+    "JENA",
+    "KNOX",
+    "LENS",
+    "LIMINAL",
+    "LINKIS",
+    "CLOWNFISH",
+    "MADLIB",
+    "MARVIN",
+    "MASFRES",
+    "METAMODEL",
+    "MXNET",
+    "NEMO",
+    "NETBEANSINFRA",
+    "NIFI",
+    "MINIFI",
+    "MINIFICPP",
+    "NUTTX",
+    "OLTU",
+    "ONAMI",
+    "CLIMATE",
+    "OPENAZ",
+    "HDDS",
+    "PEGASUS",
+    "PETRI",
+    "PINOT",
+    "PLC4X",
+    "QPIDIT",
+    "QUICKSTEP",
+    "RAT",
+    "RIPPLE",
+    "ROCKETMQ",
+    "ROL",
+    "S4",
+    "SDAP",
+    "SCIMPLE",
+    "SEDONA",
+    "SCB",
+    "STORM",
+    "SUBMARINE",
+    "TAVERNA",
+    "TENTACLES",
+    "TEZ",
+    "MTOMCAT",
+    "TRAFODION",
+    "TRAINING",
+    "TWILL",
+    "UNOMI",
+    "WAYANG",
+    "WHIRR",
+    "WHISKER",
+    "YUNIKORN",
+    "ZIPKIN",
+    "APISIX",
+    "MRM",
+    "ARIA",
+    "ARIES",
+    "ASYNCWEB",
+    "ATLAS",
+    "ATTIC",
+    "AURORA",
+    "AVALON",
+    "AVNSHARP",
+    "RUNTIME",
+    "STUDIO",
+    "CENTRAL",
+    "PLANET",
+    "TOOLS",
+    "PNIX",
+    "AXIOM",
+    "AXIS",
+    "AXISCPP",
+    "WSIF",
+    "AXIS2",
+    "TRANSPORTS",
+    "AXIS2C",
+    "BAHIR",
+    "BATCHEE",
+    "BATIK",
+    "BEAM",
+    "BEEHIVE",
+    "BIGTOP",
+    "BLUESKY",
+    "BOOKKEEPER",
+    "TM",
+    "BROOKLYN",
+    "BUILDR",
+    "BVAL",
+    "STDCXX",
+    "CACTUS",
+    "CALCITE",
+    "CAMEL",
+    "CARBONDATA",
+    "CASSANDRA",
+    "CAY",
+    "CELIX",
+    "CS",
+    "CMIS",
+    "CHUKWA",
+    "CLEREZZA",
+    "CLK",
+    "CLKE",
+    "CLOUDSTACK",
+    "COCOON",
+    "COCOON3",
+    "ATTRIBUTES",
+    "BCEL",
+    "BEANUTILS",
+    "BETWIXT",
+    "BSF",
+    "CHAIN",
+    "CLI",
+    "CODEC",
+    "COLLECTIONS",
+    "COMPRESS",
+    "CONFIGURATION",
+    "CRYPTO",
+    "CSV",
+    "DAEMON",
+    "DBCP",
+    "DBUTILS",
+    "DIGESTER",
+    "DISCOVERY",
+    "DORMANT",
+    "EL",
+    "EMAIL",
+    "EXEC",
+    "FEEDPARSER",
+    "FILEUPLOAD",
+    "FUNCTOR",
+    "GEOMETRY",
+    "IMAGING",
+    "IO",
+    "JCI",
+    "JCS",
+    "JELLY",
+    "JEXL",
+    "JXPATH",
+    "LANG",
+    "LAUNCHER",
+    "LOGGING",
+    "MATH",
+    "MODELER",
+    "NET",
+    "NUMBERS",
+    "OGNL",
+    "POOL",
+    "PRIMITIVES",
+    "PROXY",
+    "RESOURCES",
+    "RNG",
+    "SANDBOX",
+    "SANSELAN",
+    "SCXML",
+    "STATISTICS",
+    "TEXT",
+    "TRANSACTION",
+    "VALIDATOR",
+    "VFS",
+    "WEAVER",
+    "COMDEV",
+    "CONTINUUM",
+    "COR",
+    "COTTON",
+    "COUCHDB",
+    "CRUNCH",
+    "CTAKES",
+    "CUSTOS",
+    "CXF",
+    "DOSGI",
+    "CXFXJC",
+    "FEDIZ",
+    "DAFFODIL",
+    "DATAFU",
+    "DAYTRADER",
+    "DDLUTILS",
+    "DTACLOUD",
+    "DELTASPIKE",
+    "DEPOT",
+    "DERBY",
+    "DMAP",
+    "DIR",
+    "DIRSERVER",
+    "DIRAPI",
+    "DIRGROOVY",
+    "DIRKRB",
+    "DIRNAMING",
+    "DIRSHARED",
+    "DIRSTUDIO",
+    "DL",
+    "DI",
+    "DBF",
+    "DROIDS",
+    "DVSL",
+    "EAGLE",
+    "EASYANT",
+    "ECS",
+    "EDGENT",
+    "EMPIREDB",
+    "ESME",
+    "ESCIMO",
+    "ETCH",
+    "EWS",
+    "EXLBR",
+    "FORTRESS",
+    "FALCON",
+    "FELIX",
+    "FINCN",
+    "FLAGON",
+    "FLINK",
+    "FLUME",
+    "FOP",
+    "FOR",
+    "FC",
+    "FTPSERVER",
+    "GBUILD",
+    "GEODE",
+    "GERONIMO",
+    "GERONIMODEVTOOLS",
+    "GIRAPH",
+    "GOSSIP",
+    "GRFT",
+    "GRIFFIN",
+    "GROOVY",
+    "GSHELL",
+    "GUACAMOLE",
+    "GUMP",
+    "HADOOP",
+    "HDT",
+    "HDFS",
+    "MAPREDUCE",
+    "YARN",
+    "HAMA",
+    "HARMONY",
+    "HBASE",
+    "HCATALOG",
+    "HERALDRY",
+    "HISE",
+    "HIVE",
+    "HIVEMALL",
+    "HIVEMIND",
+    "HTRACE",
+    "HTTPASYNC",
+    "HTTPCLIENT",
+    "HTTPCORE",
+    "IBATISNET",
+    "IBATIS",
+    "RBATIS",
+    "IGNITE",
+    "IMPALA",
+    "IMPERIUS",
+    "INCUBATOR",
+    "INFRATEST3",
+    "INFRATEST987",
+    "INFRACLOUD1",
+    "INFRA",
+    "TEST6",
+    "IOTA",
+    "ISIS",
+    "IVY",
+    "IVYDE",
+    "JCR",
+    "JCRVLT",
+    "JCRBENCH",
+    "JCRCL",
+    "JCRSERVLET",
+    "JCRTCK",
+    "JCRRMI",
+    "OAK",
+    "OCM",
+    "JCRSITE",
+    "HUPA",
+    "IMAP",
+    "JDKIM",
+    "JSIEVE",
+    "JSPF",
+    "MAILBOX",
+    "MAILET",
+    "MIME4J",
+    "MPT",
+    "POSTAGE",
+    "PROTOCOLS",
+    "JAMES",
+    "JAXME",
+    "JCLOUDS",
+    "JDO",
+    "JS1",
+    "JS2",
+    "JOHNZON",
+    "JOSHUA",
+    "JSEC",
+    "JSPWIKI",
+    "JUDDI",
+    "JUNEAU",
+    "KAFKA",
+    "KALUMET",
+    "KAND",
+    "KARAF",
+    "KATO",
+    "KI",
+    "KITTY",
+    "KUDU",
+    "KYLIN",
+    "LABS",
+    "HTTPDRAFT",
+    "LEGAL",
+    "LIBCLOUD",
+    "LIVY",
+    "LOGCXX",
+    "LOG4J2",
+    "LOG4NET",
+    "LOG4PHP",
+    "LOKAHI",
+    "LUCENENET",
+    "LCN4C",
+    "LUCY",
+    "MAHOUT",
+    "CONNECTORS",
+    "MARMOTTA",
+    "MNG",
+    "MACR",
+    "MANT",
+    "MANTTASKS",
+    "MANTRUN",
+    "ARCHETYPE",
+    "MARCHETYPES",
+    "MARTIFACT",
+    "MASSEMBLY",
+    "MBUILDCACHE",
+    "MCHANGELOG",
+    "MCHANGES",
+    "MCHECKSTYLE",
+    "MCLEAN",
+    "MCOMPILER",
+    "MDEP",
+    "MDEPLOY",
+    "MDOAP",
+    "MDOCCK",
+    "DOXIA",
+    "DOXIASITETOOLS",
+    "DOXIATOOLS",
+    "MEAR",
+    "MECLIPSE",
+    "MEJB",
+    "MENFORCER",
+    "MGPG",
+    "MPH",
+    "MINDEXER",
+    "MINSTALL",
+    "MINVOKER",
+    "MJAR",
+    "MJARSIGNER",
+    "MJAVADOC",
+    "MJDEPRSCAN",
+    "MJDEPS",
+    "MJLINK",
+    "MJMOD",
+    "JXR",
+    "MLINKCHECK",
+    "MPATCH",
+    "MPDF",
+    "MPLUGINTESTING",
+    "MPLUGIN",
+    "MPMD",
+    "MPOM",
+    "MPIR",
+    "MNGSITE",
+    "MRAR",
+    "MRELEASE",
+    "MRRESOURCES",
+    "MREPOSITORY",
+    "MRESOLVER",
+    "MRESOURCES",
+    "SCM",
+    "MSCMPUB",
+    "MSCRIPTING",
+    "MSHADE",
+    "MSHARED",
+    "MSITE",
+    "MSKINS",
+    "MSOURCES",
+    "MSTAGE",
+    "SUREFIRE",
+    "MTOOLCHAINS",
+    "MVERIFIER",
+    "WAGON",
+    "MWAR",
+    "MWRAPPER",
+    "MAVIBOT",
+    "MEECROWAVE",
+    "MESOS",
+    "METRON",
+    "MILAGRO",
+    "DIRMINA",
+    "SSHD",
+    "MIRAE",
+    "MNEMONIC",
+    "MODPYTHON",
+    "MRQL",
+    "MRUNIT",
+    "MUSE",
+    "MXNETTEST",
+    "ADFFACES",
+    "EXTCDI",
+    "MFCOMMONS",
+    "MYFACES",
+    "EXTSCRIPT",
+    "EXTVAL",
+    "MFHTML5",
+    "ORCHESTRA",
+    "PORTLETBRIDGE",
+    "MYFACESTEST",
+    "TOBAGO",
+    "TOMAHAWK",
+    "TRINIDAD",
+    "MYNEWT",
+    "MYNEWTDOC",
+    "MYRIAD",
+    "NEETHI",
+    "NETBEANS",
+    "NIFIREG",
+    "NIFILIBS",
+    "NLPCRAFT",
+    "NPANDAY",
+    "NUTCH",
+    "NUVEM",
+    "ODE",
+    "JACOB",
+    "OWC",
+    "ODFTOOLKIT",
+    "OFBIZ",
+    "OJB",
+    "OLINGO",
+    "OLIO",
+    "OODT",
+    "OOZIE",
+    "ORP",
+    "OPENEJB",
+    "OEP",
+    "OPENJPA",
+    "OPENMEETINGS",
+    "OPENNLP",
+    "OPENOFFICE",
+    "OWB",
+    "ORC",
+    "PARQUET",
+    "PDFBOX",
+    "PHOENIX",
+    "OMID",
+    "TEPHRA",
+    "PHOTARK",
+    "PIG",
+    "PIRK",
+    "PIVOT",
+    "PLUTO",
+    "PODLINGNAMESEARCH",
+    "POLYGENE",
+    "PORTALS",
+    "APA",
+    "PB",
+    "PIO",
+    "PROVISIONR",
+    "PRC",
+    "HERMES",
+    "PULSAR",
+    "PYLUCENE",
+    "QPID",
+    "DISPATCH",
+    "QPIDJMS",
+    "PROTON",
+    "RAMPART",
+    "RAMPARTC",
+    "RANGER",
+    "RATIS",
+    "RAVE",
+    "REEF",
+    "RIVER",
+    "RYA",
+    "S2GRAPH",
+    "SAMOA",
+    "SAMZA",
+    "SAND",
+    "SANDESHA2",
+    "SANDESHA2C",
+    "SANTUARIO",
+    "SAVAN",
+    "SCOUT",
+    "SENTRY",
+    "SERF",
+    "SM",
+    "SMX4",
+    "SMXCOMP",
+    "SMX4KNL",
+    "SMX4NMR",
+    "SHALE",
+    "SHINDIG",
+    "SHIRO",
+    "CASSANDRASC",
+    "SINGA",
+    "SIRONA",
+    "SLIDER",
+    "SLING",
+    "SOAP",
+    "SOLR",
+    "SPARK",
+    "SIS",
+    "SPOT",
+    "SQOOP",
+    "STANBOL",
+    "STEVE",
+    "STOMP",
+    "STONEHENGE",
+    "STRATOS",
+    "STREAMPIPES",
+    "STREAMS",
+    "STR",
+    "WW",
+    "SB",
+    "SITE",
+    "SVN",
+    "SUPERSET",
+    "SYNAPSE",
+    "SYNCOPE",
+    "SYSTEMDS",
+    "TAJO",
+    "TAMAYA",
+    "TAPESTRY",
+    "TAP5",
+    "TASHI",
+    "TST",
+    "TEXEN",
+    "MMETRIC",
+    "THRIFT",
+    "TIKA",
+    "TILES",
+    "AUTOTAG",
+    "TEVAL",
+    "TREQ",
+    "TILESSB",
+    "TILESSHARED",
+    "TILESSHOW",
+    "TINKERPOP",
+    "TOMEE",
+    "TATPI",
+    "TOREE",
+    "TORQUE",
+    "TORQUEOLD",
+    "TC",
+    "TS",
+    "DIRTSEC",
+    "TRIPLES",
+    "TSIK",
+    "TRB",
+    "TUSCANY",
+    "TUWENI",
+    "UIMA",
+    "USERGRID",
+    "VCL",
+    "VELOCITY",
+    "VELOCITYSB",
+    "VELTOOLS",
+    "VXQUERY",
+    "VYSPER",
+    "WADI",
+    "WAVE",
+    "WEEX",
+    "WHIMSY",
+    "WICKET",
+    "WINK",
+    "WODEN",
+    "WOOKIE",
+    "WSCOMMONS",
+    "APOLLO",
+    "WSRP4J",
+    "WSS",
+    "ASFSITE",
+    "XALANC",
+    "XALANJ",
+    "XAP",
+    "XBEAN",
+    "XERCESC",
+    "XERCESP",
+    "XERCESJ",
+    "XMLCOMMONS",
+    "XMLRPC",
+    "XMLBEANS",
+    "XGC",
+    "XMLSCHEMA",
+    "XW",
+    "YETUS",
+    "YOKO",
+    "ZEPPELIN",
+    "ZETACOMP",
+    "ZOOKEEPER"
+])
diff --git a/migration/src/remap_cross_issue_links.py b/migration/src/remap_cross_issue_links.py
index 18e66ef..a3d4fbc 100644
--- a/migration/src/remap_cross_issue_links.py
+++ b/migration/src/remap_cross_issue_links.py
@@ -13,7 +13,7 @@
 
 from common import LOG_DIRNAME, MAPPINGS_DATA_DIRNAME, ISSUE_MAPPING_FILENAME, GITHUB_REMAPPED_DATA_DIRNAME, MaxRetryLimitExceedException, logging_setup, read_issue_id_map, retry_upto, github_remapped_issue_data_file, github_remapped_comment_data_file
 from github_issues_util import *
-from jira_util import embed_gh_issue_link
+from jira_util import create_issue_links_outside_projects, embed_gh_issue_link
 
 
 log_dir = Path(__file__).resolve().parent.parent.joinpath(LOG_DIRNAME)
@@ -25,6 +25,7 @@
     body = get_issue_body(token, repo, issue_number, logger)
     if body:
         updated_body = embed_gh_issue_link(body, issue_id_map, issue_number)
+        updated_body = create_issue_links_outside_projects(body)
         if updated_body == body:
             logger.debug(f"Issue {issue_number} does not contain any cross-issue links; nothing to do.")
             return
@@ -45,6 +46,7 @@
         id = comment.id
         body = comment.body
         updated_body = embed_gh_issue_link(body, issue_id_map, issue_number)
+        updated_body = create_issue_links_outside_projects(body)
         if updated_body == body:
             logger.debug(f"Comment {id} does not contain any cross-issue links; nothing to do.")
             continue