blob: 91ef0cb17f789701170856e4cc65efc18f43edb8 [file] [log] [blame]
$LOAD_PATH.unshift '/srv/whimsy/lib'
require 'find'
require 'weakref'
require 'whimsy/asf/config'
module ASF
module MLIST
# utility methods for handling mailing list attributes:
# - subscriptions
# - moderation
# whilst the source files are not particularly difficult to parse, it makes
# sense to centralise access so any necessary changes can be localised
# Note that email matching is case blind, but the original case is returned
# list and domain names are always returned as lower-case
# Potentially also the methods could check if access was allowed.
# This is currently done by the callers
# Note that the data files don't provide information on whether a list is
# public or private.
@@file_times = {} # Key=type, value = modtime
@@file_parsed = {} # Key=type, value = cache hash
# Return an array of board subscribers followed by the file update time
def self.board_subscribers(archivers=true)
return list_filter('sub', 'apache.org', 'board', archivers), File.mtime(LIST_TIME)
end
# Return an array of members@ subscribers followed by the file update time
def self.members_subscribers(archivers=true)
return list_filter('sub', 'apache.org', 'members', archivers), File.mtime(LIST_TIME)
end
# Return an array of members-notify@ subscribers followed by the file update time
def self.members_notify_subscribers(archivers=true)
return list_filter('sub', 'apache.org', 'members-notify', archivers), File.mtime(LIST_TIME)
end
# Return an array of private@pmc subscribers (including digests) followed by the file update time
# Returns nil if the subs file does not exist
# By default does not return the standard archivers
# pmc can either be a pmc name, in which case it uses private@<pmc>.apache.org
# or it can be an ASF list name, e.g. w3c@apache.org
def self.private_subscribers(pmc, archivers=false)
return [] unless Dir.exist? LIST_BASE
parts = pmc.split('@', 3) # want to detect trailing '@'
if parts.length == 1
dom = "#{pmc}.apache.org"
list = 'private'
elsif parts.length == 2 && %w{apache.org apachecon.com}.include?(parts[1])
dom = parts[1]
list = parts[0]
else
raise "Unexpected parameter: #{pmc}"
end
subs = list_filter('sub', dom, list, archivers)
digs = list_filter('digest', dom, list, archivers)
if subs.nil?
subs = digs # may also be nil
elsif !digs.nil? # i.e. neither nil, so merge
subs = (subs + digs).uniq
end
return subs, File.mtime(LIST_TIME)
end
# Return security subscribers (including digests)
def self.security_subscribers(pmc, archivers=false)
return [] unless Dir.exist? LIST_BASE
subs = list_filter('sub', "#{pmc}.apache.org", 'security', archivers)
digs = list_filter('digest', "#{pmc}.apache.org", 'security', archivers)
if subs.nil?
subs = digs # may also be nil
elsif !digs.nil? # i.e. neither nil, so merge
subs = (subs + digs).uniq
end
return subs, File.mtime(LIST_TIME)
end
# return a hash of subscriptions for the list of emails provided
# the following keys are added to the response hash:
# :subtime - the timestamp when the data was last updated
# :subscriptions - an array of pairs: [list name, subscriber email]
# N.B. not the same format as the moderates() method
def self.subscriptions(emails, response = {})
response[:subscriptions] = []
response[:subtime] = File.mtime(LIST_TIME)
emailslc = emails.map {|email| ASF::Mail.to_canonical(email.downcase)}
list_parse('sub') do |dom, list, subs|
subs.each do |sub|
if emailslc.include? ASF::Mail.to_canonical(sub.downcase)
response[:subscriptions] << ["#{list}@#{dom}", sub]
end
end
end
response
end
# return a hash of digest subscriptions for the list of emails provided
# the following keys are added to the response hash:
# :digtime - the timestamp when the data was last updated
# :digests - an array of pairs: [list name, subscriber email]
# N.B. not the same format as the moderates() method
def self.digests(emails, response = {})
response[:digests] = []
response[:digtime] = File.mtime(LIST_TIME)
emailslc = emails.map {|email| ASF::Mail.to_canonical(email.downcase)}
list_parse('dig') do |dom, list, subs|
subs.each do |sub|
if emailslc.include? ASF::Mail.to_canonical(sub.downcase)
response[:digests] << ["#{list}@#{dom}", sub]
end
end
end
response
end
# return the mailing lists which are moderated by any of the list of emails
# the following keys are added to the response hash:
# :modtime - the timestamp when the data was last updated
# :moderates - a hash. key: list name; entry: array of emails that match a moderator for the list
# N.B. not the same format as the subscriptions() method
def self.moderates(user_emails, response = {})
response[:moderates] = {}
response[:modtime] = File.mtime(LIST_TIME)
umails = user_emails.map {|m| ASF::Mail.to_canonical(m.downcase)} # outside loop
list_parse('mod') do |dom, list, emails|
matching = emails.select {|m| umails.include? ASF::Mail.to_canonical(m.downcase)}
response[:moderates]["#{list}@#{dom}"] = matching unless matching.empty?
end
response
end
# helper function for matching against mod and subs entries
# does the target mail_domain match the current list?
def self.matches_list?(mail_domain, dom, list)
# normal tlp style (now also podlings):
#/home/apmail/lists/commons.apache.org/dev/mod
#Apache lists (e.g. some non-PMCs)
#/home/apmail/lists/apache.org/list/mod
return "#{mail_domain}.apache.org" == dom ||
(dom == 'apache.org' &&
(list == mail_domain || list.start_with?("#{mail_domain}-"))
) || "#{list}@#{dom}" == mail_domain # e.g. planners@apachecon.com
end
# for a mail domain, extract related lists and their moderators
# also returns the time when the data was last checked
# returns: [{dev@a.o=>[email1, email2]}, mod-time]
# if mail_domain is nil, matches all lists except infra test lists
def self.list_moderators(mail_domain)
moderators = {}
list_parse('mod') do |dom, list, subs|
# drop infra test lists
next if list =~ /^infra-[a-z]$/
next if dom == 'incubator.apache.org' && list =~ /^infra-dev2?$/
# does the list match our target?
next unless mail_domain.nil? or matches_list?(mail_domain, dom, list)
moderators["#{list}@#{dom}"] = subs.sort
end
return moderators.to_h, File.mtime(LIST_TIME)
end
# for a mail domain, extract related lists and their subscribers (default only the count)
# also returns the time when the data was last checked
# N.B. by default includes archivers as subscribers
# For top-level apache.org lists, the mail_domain is either:
# - the full list name (e.g. press), or:
# - the list prefix (e.g. legal)
# If podling==true, then also check for old-style podling names
# If list_subs==true, return subscriber emails else sub count
# If skip_archivers==true, exclude archivers
# Matches:
# {mail_domain}.apache.org/*
# apache.org/{mail_domain}(-.*)? (e.g. press, legal)
# incubator.apache.org/{mail_domain}-.* (if podling==true)
# Returns: {list}@{dom}
def self.list_subscribers(mail_domain, _podling=false, list_subs=false, skip_archivers=false)
subscribers = {}
list_parse('sub') do |dom, list, subs|
# drop infra test lists
next if list =~ /^infra-[a-z]$/
next if dom == 'incubator.apache.org' && list =~ /^infra-dev2?$/
# normal tlp style:
#/home/apmail/lists/commons.apache.org/dev/mod
# does the list match our target?
next unless matches_list?(mail_domain, dom, list)
if skip_archivers
subscribers["#{list}@#{dom}"] = list_subs ? subs.reject {|sub| is_archiver?(sub)}.sort : subs.reject {|sub| is_archiver?(sub)}.size
else
subscribers["#{list}@#{dom}"] = list_subs ? subs.sort : subs.size
end
end
return subscribers.to_h, File.mtime(LIST_TIME)
end
# for a mail domain, extract related lists and their subscribers (default only the count)
# also returns the time when the data was last checked
# N.B. excludes known archivers
# For top-level apache.org lists, the mail_domain is either:
# - the full list name (e.g. press), or:
# - the list prefix (e.g. legal)
# If podling==true, then also check for old-style podling names
# If list_subs==true, return subscriber emails else sub count
# Matches:
# {mail_domain}.apache.org/*
# apache.org/{mail_domain}(-.*)? (e.g. press, legal)
# incubator.apache.org/{mail_domain}-.* (if podling==true)
# Returns: {list}@{dom}
def self.list_subs(mail_domain, podling=false, list_subs=false)
self.list_subscribers(mail_domain, podling, list_subs, true)
end
# returns the list time
def self.list_time
File.mtime(LIST_TIME)
end
def self.list_archivers
list_parse('sub') do |dom, list, subs|
yield [dom, list, subs.select {|s| is_archiver? s}.map {|m| [m, archiver_type(m, dom, list)].flatten}]
end
end
# return the [domain, list, types=public|private|...] for all entries in the subscriber listings
# the subscribers are not included
def self.list_types(show_all=false)
list_archivers do |dom, list, subs|
types = {}
subs.each do |sub|
type = sub[2]
types[type] = 1 unless %w(alias direct).include? type
end
type = types.keys.sort.join(',')
yield [dom, list, type] if show_all || type == 'public'
end
end
# return a hash of lists for a project, together with privacy setting
# tlp - the prefix for the full domain
# This is a replacement for ASF::Mail.lists
def self.domain_lists(project, show_all)
lists = {}
list_types(show_all) do |dom, list, type|
if matches_list?(project, dom, list)
lists["#{list}@#{dom}"] = type
end
end
lists
end
# return the [domain, list] for all entries in the subscriber listings
# the subscribers are not included
def self.each_list
Find.find(LIST_CACHE) do |path|
parts = path.split('/')
if parts[-1] == 'sub'
yield [parts[-3], parts[-2]]
end
end
end
# Parse the marker files: modpost, modsub, remote
# Return hash: key="domain list", value=hash containing :modsub, :modpost, :remote
# if the list has the corresponding file
# BETA: API may change!
def self.parse_markers
hash = Hash.new {|h,k| h[k] = Hash.new}
File.open(File.join(LIST_BASE, 'list-modsub')).each do |line|
hash[line.chomp][:modsub] = 1
end
File.open(File.join(LIST_BASE, 'list-modpost')).each do |line|
hash[line.chomp][:modpost] = 1
end
File.open(File.join(LIST_BASE, 'list-remote')).each do |line|
hash[line.chomp][:remote] = 1
end
hash
end
private
# return the archiver type as array: [:MBOX|:PONY|:MINO|:MAIL_ARCH|:MARKMAIL|:WHIMSY, 'public'|'private'|'alias'|'direct']
# minotaur archiver names do not include any public/private indication
def self.archiver_type(email, dom, list)
case email
when ARCH_MBOX_PUB then return [:MBOX, 'public']
when ARCH_MBOX_PRV then return [:MBOX, 'private']
when ARCH_MBOX_RST then return [:MBOX, 'restricted']
when ARCH_PONY_PUB then return [:PONY, 'public']
when ARCH_PONY_PRV then return [:PONY, 'private']
when ARCH_EXT_MAIL_ARCHIVE then return [:MAIL_ARCH, 'public']
# normal archiver routed via .qmail-[tlp-]list-archive
when "#{list}-archive@#{dom}" then return [:MINO, 'alias']
# Direct mail to minotaur
when "apmail-#{dom.split('.').first}-#{list}-archive@www.apache.org" then return [:MINO, 'direct']
# Unexpected archiver email commits-archive@incubator.apache.org for commits.deprecated@incubator.apache.org
# INFRA-21658
when "#{list.chomp('.deprecated')}-archive@#{dom}" then return [:MINO, 'alias']
else
return [:MARKMAIL, 'public'] if is_markmail_archiver?(email)
# Whimsy currently only 'archives' private lists
return [:WHIMSY, 'private'] if is_whimsy_archiver?(email)
end
raise "Unexpected archiver email #{email} for #{list}@#{dom}" # Should not happen?
end
# Is the email a minotaur archiver?
def self.is_mino_archiver?(e)
e =~ /.-archive@([^.]+\.)?(apache\.org|apachecon\.com)$/
end
# Is the email a Whimsy archiver?
def self.is_whimsy_archiver?(e)
e =~ /@whimsy(-vm\d+)?\.apache\.org$/
end
# Is the email a markmail archiver?
def self.is_markmail_archiver?(e)
e =~ ARCH_EXT_MARKMAIL_RE
end
def self.is_archiver?(e)
ARCHIVERS.include?(e) or is_mino_archiver?(e) or is_whimsy_archiver?(e) or is_markmail_archiver?(e)
end
def self.is_private_archiver?(e)
[ARCH_MBOX_PRV, ARCH_PONY_PRV].include?(e) or
e =~ /^security-archive@.*\.apache\.org$/ or
e =~ /^apmail-\w+-security-archive@www.apache.org/ # direct subscription
end
def self.downcase(array)
array.map(&:downcase)
end
def self.isRecent(file)
return File.exist?(file) && (Time.now - File.mtime(file)) < 60 * 60 * 5
end
# Filter the appropriate list, matching on domain and list
# Params:
# - type: 'mod' or 'sub' or 'digest' or 'allow'
# - matchdom: must match the domain (e.g. 'httpd.apache.org')
# - matchlist: must match the list (e.g. 'dev')
# - archivers: whether to include standard ASF archivers (default true)
# The email addresses are returned as an array. May be empty.
# If there is no match, then nil is returned
def self.list_filter(type, matchdom, matchlist, archivers=true)
list = File.join(LIST_CACHE, matchdom, matchlist, type)
if File.exist? list
if archivers
return File.read(list).split("\n")
else
return File.read(list).split("\n").reject {|e| is_archiver?(e)}
end
end
return nil
end
# Scans the cache files
# Param: type = 'mod' or 'sub' or 'dig'
# Yields:
# - domain (e.g. [xxx.].apache.org)
# - list (e.g. dev)
# - emails as an array
def self.list_parse(type, &block)
case type
when 'dig'
type = 'digest'
when 'sub', 'mod'
# pass
else
raise ArgumentError.new('type: expecting dig, mod or sub')
end
ctime = @@file_times[type] || 0
mtime = File.mtime(LIST_TIME).to_i
if mtime <= ctime
cached = @@file_parsed[type]
if cached
begin
cached.each do |d, l, m|
block.call d, l, m # these are already frozen
end
return
rescue WeakRef::RefError
@@file_times[type] = 0
end
end
else
@@file_parsed[type] = nil
end
cache = [] # see if this preserves mod cache
Find.find(LIST_CACHE) do |path|
parts = path.split('/')
if parts[-1] == type
dom = parts[-3].downcase.freeze
list = parts[-2].downcase.freeze
begin
# During mail migration links are used - allow for broken ones
mails = File.read(path).split("\n")
cache << [dom, list, mails]
yield [dom, list, mails]
rescue StandardError => e
Wunderbar.warn e
end
end
end
@@file_parsed[type] = WeakRef.new(cache)
@@file_times[type] = mtime
nil # don't return file contents
end
# Standard ASF archivers
ARCH_MBOX_PUB = "archiver@mbox-vm.apache.org"
ARCH_MBOX_PRV = "private@mbox-vm.apache.org"
ARCH_MBOX_RST = "restricted@mbox-vm.apache.org"
ARCH_PONY_PUB = "archive-asf-public@cust-asf.ponee.io"
ARCH_PONY_PRV = "archive-asf-private@cust-asf.ponee.io"
# Standard external archivers (necessarily public)
ARCH_EXT_MAIL_ARCHIVE = "archive@mail-archive.com"
ARCH_EXT_MARKMAIL_RE = %r{^\w+\.\w+\.\w+@.\.markmail\.org$} # one.two.three@a.markmail.org
ARCHIVERS = [ARCH_PONY_PRV, ARCH_PONY_PUB,
ARCH_MBOX_PUB, ARCH_MBOX_PRV, ARCH_MBOX_RST, ARCH_EXT_MAIL_ARCHIVE]
# Private archivers
ARCHIVERS_PRV = [ARCH_PONY_PRV, ARCH_MBOX_PRV]
# TODO alias archivers: either add list or use RE to filter them
LIST_BASE = ASF::Config[:subscriptions] # allow overrides for testing etc
# If this file exists, it is the time when the data was last extracted
# The mods and subs files are only updated if they have changed
LIST_TIME = File.join(LIST_BASE, 'list-start')
LIST_CACHE = File.join(LIST_BASE, 'cache')
end
end
if __FILE__ == $0
$LOAD_PATH.unshift '/srv/whimsy/lib'
require 'whimsy/asf'
p ASF::MLIST.parse_markers
exit
domain = ARGV.shift || 'whimsical'
mlist = ASF::Committee.find(domain).mail_list
p mlist
p ASF::MLIST.domain_lists(mlist, false)
p ASF::MLIST.domain_lists(mlist, true)
p ASF::MLIST.list_subscribers(domain)
p ASF::MLIST.list_subscribers(domain, false, false, true)
p ASF::MLIST.list_subs(domain)
p ASF::MLIST.list_subscribers(domain, false, true)
p ASF::MLIST.list_subscribers(domain, false, true, true)
p ASF::MLIST.list_subs(domain, false, true)
p ASF::MLIST.list_moderators(domain, true)
p ASF::MLIST.private_subscribers(domain)
p ASF::MLIST.moderates(['chrisd@apache.org'])
p ASF::MLIST.digests(['chrisd@apache.org'])
end