Create hyperlinks to other ASF projects for PRJ-XXXX style cross-issue links. (#131)
diff --git a/migration/src/jira_util.py b/migration/src/jira_util.py
index 9ee2682..f1bffea 100644
--- a/migration/src/jira_util.py
+++ b/migration/src/jira_util.py
@@ -341,3 +341,699 @@
text = re.sub(r"\[(LUCENE-\d+)\]\(https?[^\)]+LUCENE-\d+\)", repl_md_link, text)
return text
+
+
+ASF_JIRA_BASE = "https://issues.apache.org/jira/browse/"
+
+def create_issue_links_outside_projects(text: str) -> str:
+ """Create links to outside ASF projects.
+ """
+
+ def repl_simple(m: re.Match):
+ prj = m.group(2).split("-")[0]
+ if prj not in ALL_JIRA_PROJECTS:
+ return m.group(0)
+ jira_link = ASF_JIRA_BASE + m.group(2)
+ return f"{m.group(1)}[{m.group(2)}]({jira_link}){m.group(3)}"
+
+ def repl_paren(m: re.Match):
+ prj = m.group(2).split("-")[0]
+ if prj not in ALL_JIRA_PROJECTS:
+ return m.group(0)
+ jira_link = ASF_JIRA_BASE + m.group(2)
+ return f"{m.group(1)}[{m.group(2)}]({jira_link}){m.group(3)}"
+
+ def repl_bracket(m: re.Match):
+ prj = m.group(2).split("-")[0]
+ if prj not in ALL_JIRA_PROJECTS:
+ return m.group(0)
+ jira_link = ASF_JIRA_BASE + m.group(2)
+ return jira_link
+
+ text = re.sub(r"(\s)([A-Z0-9]{2,20}-\d+)([\s,;:\?\!\.])", repl_simple, text)
+ text = re.sub(r"(^)([A-Z0-9]{2,20}-\d+)([\s,;:\?\!\.])", repl_simple, text)
+ text = re.sub(r"(\()([A-Z0-9]{2,20}-\d+)(\))", repl_paren, text)
+ text = re.sub(r"(\[)([A-Z0-9]{2,20}-\d+)(\])(?!\()", repl_bracket, text)
+
+ return text
+
+
+ALL_JIRA_PROJECTS = set([
+ "AAR",
+ "ABDERA",
+ "ACCUMULO",
+ "ACE",
+ "ACL",
+ "AMQ",
+ "AMQNET",
+ "APLO",
+ "ARTEMIS",
+ "AMQCPP",
+ "AMQCLI",
+ "OPENWIRE",
+ "BLAZE",
+ "ADDR",
+ "AGILA",
+ "AIRAVATA",
+ "ALOIS",
+ "ARMI",
+ "AMATERASU",
+ "AMBARI",
+ "AMBER",
+ "ANAKIA",
+ "AGE2",
+ "AGEOLD",
+ "ANY23",
+ "APEXCORE",
+ "APEXMALHAR",
+ "ARROW",
+ "ASTERIXDB",
+ "AVRO",
+ "AWF",
+ "BLUEMARLIN",
+ "BLUR",
+ "CHAINSAW",
+ "CMDA",
+ "COMMONSSITE",
+ "COMMONSRDF",
+ "TESTING",
+ "CONCERTED",
+ "CORAL",
+ "CB",
+ "CRAIL",
+ "CURATOR",
+ "DATALAB",
+ "DATASKETCHES",
+ "DIRECTMEMORY",
+ "DORIS",
+ "DRILL",
+ "DUBBO",
+ "ECHARTS",
+ "EVENTMESH",
+ "FINERACT",
+ "FLEX",
+ "FREEMARKER",
+ "GEARPUMP",
+ "GOBBLIN",
+ "GORA",
+ "HAWQ",
+ "HELIX",
+ "HOP",
+ "HORN",
+ "HUDI",
+ "INLONG",
+ "IOTDB",
+ "JENA",
+ "KNOX",
+ "LENS",
+ "LIMINAL",
+ "LINKIS",
+ "CLOWNFISH",
+ "MADLIB",
+ "MARVIN",
+ "MASFRES",
+ "METAMODEL",
+ "MXNET",
+ "NEMO",
+ "NETBEANSINFRA",
+ "NIFI",
+ "MINIFI",
+ "MINIFICPP",
+ "NUTTX",
+ "OLTU",
+ "ONAMI",
+ "CLIMATE",
+ "OPENAZ",
+ "HDDS",
+ "PEGASUS",
+ "PETRI",
+ "PINOT",
+ "PLC4X",
+ "QPIDIT",
+ "QUICKSTEP",
+ "RAT",
+ "RIPPLE",
+ "ROCKETMQ",
+ "ROL",
+ "S4",
+ "SDAP",
+ "SCIMPLE",
+ "SEDONA",
+ "SCB",
+ "STORM",
+ "SUBMARINE",
+ "TAVERNA",
+ "TENTACLES",
+ "TEZ",
+ "MTOMCAT",
+ "TRAFODION",
+ "TRAINING",
+ "TWILL",
+ "UNOMI",
+ "WAYANG",
+ "WHIRR",
+ "WHISKER",
+ "YUNIKORN",
+ "ZIPKIN",
+ "APISIX",
+ "MRM",
+ "ARIA",
+ "ARIES",
+ "ASYNCWEB",
+ "ATLAS",
+ "ATTIC",
+ "AURORA",
+ "AVALON",
+ "AVNSHARP",
+ "RUNTIME",
+ "STUDIO",
+ "CENTRAL",
+ "PLANET",
+ "TOOLS",
+ "PNIX",
+ "AXIOM",
+ "AXIS",
+ "AXISCPP",
+ "WSIF",
+ "AXIS2",
+ "TRANSPORTS",
+ "AXIS2C",
+ "BAHIR",
+ "BATCHEE",
+ "BATIK",
+ "BEAM",
+ "BEEHIVE",
+ "BIGTOP",
+ "BLUESKY",
+ "BOOKKEEPER",
+ "TM",
+ "BROOKLYN",
+ "BUILDR",
+ "BVAL",
+ "STDCXX",
+ "CACTUS",
+ "CALCITE",
+ "CAMEL",
+ "CARBONDATA",
+ "CASSANDRA",
+ "CAY",
+ "CELIX",
+ "CS",
+ "CMIS",
+ "CHUKWA",
+ "CLEREZZA",
+ "CLK",
+ "CLKE",
+ "CLOUDSTACK",
+ "COCOON",
+ "COCOON3",
+ "ATTRIBUTES",
+ "BCEL",
+ "BEANUTILS",
+ "BETWIXT",
+ "BSF",
+ "CHAIN",
+ "CLI",
+ "CODEC",
+ "COLLECTIONS",
+ "COMPRESS",
+ "CONFIGURATION",
+ "CRYPTO",
+ "CSV",
+ "DAEMON",
+ "DBCP",
+ "DBUTILS",
+ "DIGESTER",
+ "DISCOVERY",
+ "DORMANT",
+ "EL",
+ "EMAIL",
+ "EXEC",
+ "FEEDPARSER",
+ "FILEUPLOAD",
+ "FUNCTOR",
+ "GEOMETRY",
+ "IMAGING",
+ "IO",
+ "JCI",
+ "JCS",
+ "JELLY",
+ "JEXL",
+ "JXPATH",
+ "LANG",
+ "LAUNCHER",
+ "LOGGING",
+ "MATH",
+ "MODELER",
+ "NET",
+ "NUMBERS",
+ "OGNL",
+ "POOL",
+ "PRIMITIVES",
+ "PROXY",
+ "RESOURCES",
+ "RNG",
+ "SANDBOX",
+ "SANSELAN",
+ "SCXML",
+ "STATISTICS",
+ "TEXT",
+ "TRANSACTION",
+ "VALIDATOR",
+ "VFS",
+ "WEAVER",
+ "COMDEV",
+ "CONTINUUM",
+ "COR",
+ "COTTON",
+ "COUCHDB",
+ "CRUNCH",
+ "CTAKES",
+ "CUSTOS",
+ "CXF",
+ "DOSGI",
+ "CXFXJC",
+ "FEDIZ",
+ "DAFFODIL",
+ "DATAFU",
+ "DAYTRADER",
+ "DDLUTILS",
+ "DTACLOUD",
+ "DELTASPIKE",
+ "DEPOT",
+ "DERBY",
+ "DMAP",
+ "DIR",
+ "DIRSERVER",
+ "DIRAPI",
+ "DIRGROOVY",
+ "DIRKRB",
+ "DIRNAMING",
+ "DIRSHARED",
+ "DIRSTUDIO",
+ "DL",
+ "DI",
+ "DBF",
+ "DROIDS",
+ "DVSL",
+ "EAGLE",
+ "EASYANT",
+ "ECS",
+ "EDGENT",
+ "EMPIREDB",
+ "ESME",
+ "ESCIMO",
+ "ETCH",
+ "EWS",
+ "EXLBR",
+ "FORTRESS",
+ "FALCON",
+ "FELIX",
+ "FINCN",
+ "FLAGON",
+ "FLINK",
+ "FLUME",
+ "FOP",
+ "FOR",
+ "FC",
+ "FTPSERVER",
+ "GBUILD",
+ "GEODE",
+ "GERONIMO",
+ "GERONIMODEVTOOLS",
+ "GIRAPH",
+ "GOSSIP",
+ "GRFT",
+ "GRIFFIN",
+ "GROOVY",
+ "GSHELL",
+ "GUACAMOLE",
+ "GUMP",
+ "HADOOP",
+ "HDT",
+ "HDFS",
+ "MAPREDUCE",
+ "YARN",
+ "HAMA",
+ "HARMONY",
+ "HBASE",
+ "HCATALOG",
+ "HERALDRY",
+ "HISE",
+ "HIVE",
+ "HIVEMALL",
+ "HIVEMIND",
+ "HTRACE",
+ "HTTPASYNC",
+ "HTTPCLIENT",
+ "HTTPCORE",
+ "IBATISNET",
+ "IBATIS",
+ "RBATIS",
+ "IGNITE",
+ "IMPALA",
+ "IMPERIUS",
+ "INCUBATOR",
+ "INFRATEST3",
+ "INFRATEST987",
+ "INFRACLOUD1",
+ "INFRA",
+ "TEST6",
+ "IOTA",
+ "ISIS",
+ "IVY",
+ "IVYDE",
+ "JCR",
+ "JCRVLT",
+ "JCRBENCH",
+ "JCRCL",
+ "JCRSERVLET",
+ "JCRTCK",
+ "JCRRMI",
+ "OAK",
+ "OCM",
+ "JCRSITE",
+ "HUPA",
+ "IMAP",
+ "JDKIM",
+ "JSIEVE",
+ "JSPF",
+ "MAILBOX",
+ "MAILET",
+ "MIME4J",
+ "MPT",
+ "POSTAGE",
+ "PROTOCOLS",
+ "JAMES",
+ "JAXME",
+ "JCLOUDS",
+ "JDO",
+ "JS1",
+ "JS2",
+ "JOHNZON",
+ "JOSHUA",
+ "JSEC",
+ "JSPWIKI",
+ "JUDDI",
+ "JUNEAU",
+ "KAFKA",
+ "KALUMET",
+ "KAND",
+ "KARAF",
+ "KATO",
+ "KI",
+ "KITTY",
+ "KUDU",
+ "KYLIN",
+ "LABS",
+ "HTTPDRAFT",
+ "LEGAL",
+ "LIBCLOUD",
+ "LIVY",
+ "LOGCXX",
+ "LOG4J2",
+ "LOG4NET",
+ "LOG4PHP",
+ "LOKAHI",
+ "LUCENENET",
+ "LCN4C",
+ "LUCY",
+ "MAHOUT",
+ "CONNECTORS",
+ "MARMOTTA",
+ "MNG",
+ "MACR",
+ "MANT",
+ "MANTTASKS",
+ "MANTRUN",
+ "ARCHETYPE",
+ "MARCHETYPES",
+ "MARTIFACT",
+ "MASSEMBLY",
+ "MBUILDCACHE",
+ "MCHANGELOG",
+ "MCHANGES",
+ "MCHECKSTYLE",
+ "MCLEAN",
+ "MCOMPILER",
+ "MDEP",
+ "MDEPLOY",
+ "MDOAP",
+ "MDOCCK",
+ "DOXIA",
+ "DOXIASITETOOLS",
+ "DOXIATOOLS",
+ "MEAR",
+ "MECLIPSE",
+ "MEJB",
+ "MENFORCER",
+ "MGPG",
+ "MPH",
+ "MINDEXER",
+ "MINSTALL",
+ "MINVOKER",
+ "MJAR",
+ "MJARSIGNER",
+ "MJAVADOC",
+ "MJDEPRSCAN",
+ "MJDEPS",
+ "MJLINK",
+ "MJMOD",
+ "JXR",
+ "MLINKCHECK",
+ "MPATCH",
+ "MPDF",
+ "MPLUGINTESTING",
+ "MPLUGIN",
+ "MPMD",
+ "MPOM",
+ "MPIR",
+ "MNGSITE",
+ "MRAR",
+ "MRELEASE",
+ "MRRESOURCES",
+ "MREPOSITORY",
+ "MRESOLVER",
+ "MRESOURCES",
+ "SCM",
+ "MSCMPUB",
+ "MSCRIPTING",
+ "MSHADE",
+ "MSHARED",
+ "MSITE",
+ "MSKINS",
+ "MSOURCES",
+ "MSTAGE",
+ "SUREFIRE",
+ "MTOOLCHAINS",
+ "MVERIFIER",
+ "WAGON",
+ "MWAR",
+ "MWRAPPER",
+ "MAVIBOT",
+ "MEECROWAVE",
+ "MESOS",
+ "METRON",
+ "MILAGRO",
+ "DIRMINA",
+ "SSHD",
+ "MIRAE",
+ "MNEMONIC",
+ "MODPYTHON",
+ "MRQL",
+ "MRUNIT",
+ "MUSE",
+ "MXNETTEST",
+ "ADFFACES",
+ "EXTCDI",
+ "MFCOMMONS",
+ "MYFACES",
+ "EXTSCRIPT",
+ "EXTVAL",
+ "MFHTML5",
+ "ORCHESTRA",
+ "PORTLETBRIDGE",
+ "MYFACESTEST",
+ "TOBAGO",
+ "TOMAHAWK",
+ "TRINIDAD",
+ "MYNEWT",
+ "MYNEWTDOC",
+ "MYRIAD",
+ "NEETHI",
+ "NETBEANS",
+ "NIFIREG",
+ "NIFILIBS",
+ "NLPCRAFT",
+ "NPANDAY",
+ "NUTCH",
+ "NUVEM",
+ "ODE",
+ "JACOB",
+ "OWC",
+ "ODFTOOLKIT",
+ "OFBIZ",
+ "OJB",
+ "OLINGO",
+ "OLIO",
+ "OODT",
+ "OOZIE",
+ "ORP",
+ "OPENEJB",
+ "OEP",
+ "OPENJPA",
+ "OPENMEETINGS",
+ "OPENNLP",
+ "OPENOFFICE",
+ "OWB",
+ "ORC",
+ "PARQUET",
+ "PDFBOX",
+ "PHOENIX",
+ "OMID",
+ "TEPHRA",
+ "PHOTARK",
+ "PIG",
+ "PIRK",
+ "PIVOT",
+ "PLUTO",
+ "PODLINGNAMESEARCH",
+ "POLYGENE",
+ "PORTALS",
+ "APA",
+ "PB",
+ "PIO",
+ "PROVISIONR",
+ "PRC",
+ "HERMES",
+ "PULSAR",
+ "PYLUCENE",
+ "QPID",
+ "DISPATCH",
+ "QPIDJMS",
+ "PROTON",
+ "RAMPART",
+ "RAMPARTC",
+ "RANGER",
+ "RATIS",
+ "RAVE",
+ "REEF",
+ "RIVER",
+ "RYA",
+ "S2GRAPH",
+ "SAMOA",
+ "SAMZA",
+ "SAND",
+ "SANDESHA2",
+ "SANDESHA2C",
+ "SANTUARIO",
+ "SAVAN",
+ "SCOUT",
+ "SENTRY",
+ "SERF",
+ "SM",
+ "SMX4",
+ "SMXCOMP",
+ "SMX4KNL",
+ "SMX4NMR",
+ "SHALE",
+ "SHINDIG",
+ "SHIRO",
+ "CASSANDRASC",
+ "SINGA",
+ "SIRONA",
+ "SLIDER",
+ "SLING",
+ "SOAP",
+ "SOLR",
+ "SPARK",
+ "SIS",
+ "SPOT",
+ "SQOOP",
+ "STANBOL",
+ "STEVE",
+ "STOMP",
+ "STONEHENGE",
+ "STRATOS",
+ "STREAMPIPES",
+ "STREAMS",
+ "STR",
+ "WW",
+ "SB",
+ "SITE",
+ "SVN",
+ "SUPERSET",
+ "SYNAPSE",
+ "SYNCOPE",
+ "SYSTEMDS",
+ "TAJO",
+ "TAMAYA",
+ "TAPESTRY",
+ "TAP5",
+ "TASHI",
+ "TST",
+ "TEXEN",
+ "MMETRIC",
+ "THRIFT",
+ "TIKA",
+ "TILES",
+ "AUTOTAG",
+ "TEVAL",
+ "TREQ",
+ "TILESSB",
+ "TILESSHARED",
+ "TILESSHOW",
+ "TINKERPOP",
+ "TOMEE",
+ "TATPI",
+ "TOREE",
+ "TORQUE",
+ "TORQUEOLD",
+ "TC",
+ "TS",
+ "DIRTSEC",
+ "TRIPLES",
+ "TSIK",
+ "TRB",
+ "TUSCANY",
+ "TUWENI",
+ "UIMA",
+ "USERGRID",
+ "VCL",
+ "VELOCITY",
+ "VELOCITYSB",
+ "VELTOOLS",
+ "VXQUERY",
+ "VYSPER",
+ "WADI",
+ "WAVE",
+ "WEEX",
+ "WHIMSY",
+ "WICKET",
+ "WINK",
+ "WODEN",
+ "WOOKIE",
+ "WSCOMMONS",
+ "APOLLO",
+ "WSRP4J",
+ "WSS",
+ "ASFSITE",
+ "XALANC",
+ "XALANJ",
+ "XAP",
+ "XBEAN",
+ "XERCESC",
+ "XERCESP",
+ "XERCESJ",
+ "XMLCOMMONS",
+ "XMLRPC",
+ "XMLBEANS",
+ "XGC",
+ "XMLSCHEMA",
+ "XW",
+ "YETUS",
+ "YOKO",
+ "ZEPPELIN",
+ "ZETACOMP",
+ "ZOOKEEPER"
+])
diff --git a/migration/src/remap_cross_issue_links.py b/migration/src/remap_cross_issue_links.py
index 18e66ef..a3d4fbc 100644
--- a/migration/src/remap_cross_issue_links.py
+++ b/migration/src/remap_cross_issue_links.py
@@ -13,7 +13,7 @@
from common import LOG_DIRNAME, MAPPINGS_DATA_DIRNAME, ISSUE_MAPPING_FILENAME, GITHUB_REMAPPED_DATA_DIRNAME, MaxRetryLimitExceedException, logging_setup, read_issue_id_map, retry_upto, github_remapped_issue_data_file, github_remapped_comment_data_file
from github_issues_util import *
-from jira_util import embed_gh_issue_link
+from jira_util import create_issue_links_outside_projects, embed_gh_issue_link
log_dir = Path(__file__).resolve().parent.parent.joinpath(LOG_DIRNAME)
@@ -25,6 +25,7 @@
body = get_issue_body(token, repo, issue_number, logger)
if body:
updated_body = embed_gh_issue_link(body, issue_id_map, issue_number)
+ updated_body = create_issue_links_outside_projects(body)
if updated_body == body:
logger.debug(f"Issue {issue_number} does not contain any cross-issue links; nothing to do.")
return
@@ -45,6 +46,7 @@
id = comment.id
body = comment.body
updated_body = embed_gh_issue_link(body, issue_id_map, issue_number)
+ updated_body = create_issue_links_outside_projects(body)
if updated_body == body:
logger.debug(f"Comment {id} does not contain any cross-issue links; nothing to do.")
continue