blob: 46d2c729b88815b39b2d44c55135bc2518df113a [file] [log] [blame]
#!/usr/bin/env ruby
PAGETITLE = "Members@ Mailing List Statistics" # Wvisible:members
$LOAD_PATH.unshift '/srv/whimsy/lib'
require 'wunderbar'
require 'wunderbar/bootstrap'
require 'wunderbar/jquery'
require 'whimsy/asf'
require 'whimsy/asf/agenda'
require 'date'
require 'mail'
require '../../tools/mboxhdr2csv.rb'
require_relative 'meeting-util'
user = ASF::Person.new($USER)
unless user.asf_member?
print "Status: 401 Unauthorized\r\n"
print "WWW-Authenticate: Basic realm=\"ASF Members\"\r\n\r\n"
exit
end
# Return sorted data in JSON format if the query string includes 'json'
ENV['HTTP_ACCEPT'] = 'application/json' if ENV['QUERY_STRING'].include? 'json'
LIST_ROOT = 'members'
SRV_MAIL = "/srv/mail/#{LIST_ROOT}"
WEEK_TOTAL = '@@total' # Use @@ so it can't match who name/emails
WEEK_START = '@@start'
COHORT_STYLES = { # TODO find better ways to colorize
'Zero to two years' => 'text-warning',
'Two to five years' => 'text-success',
'Five to ten years' => 'text-info',
'Ten or more years' => 'text-primary',
'Non-members' => 'text-muted'
}
# Define simple styles for various 'ages' of Members
# 1-2 years, 3-5 years, 5-10 years, 10+ years
def style_cohorts(cohorts)
today = Date.today.year
cohorts['cohorts'].each do |id, date|
case date[0,4].to_i
when (today-1)..today
cohorts['cohorts'][id] = COHORT_STYLES['Zero to two years']
when (today-5)...(today-1)
cohorts['cohorts'][id] = COHORT_STYLES['Two to five years']
when (today-10)...(today-5)
cohorts['cohorts'][id] = COHORT_STYLES['Five to ten years']
else
cohorts['cohorts'][id] = COHORT_STYLES['Ten or more years']
end
end
end
# Display monthly statistics for all available data
def display_monthly(months:, nondiscuss:, cohorts:)
months.sort.reverse.each do |month|
data = MailUtils.get_mails_month(mailroot: SRV_MAIL, yearmonth: month, nondiscuss: nondiscuss)
next if data.empty?
_h1 "#{LIST_ROOT}@ statistics for #{month} (total mails: #{data[MailUtils::MAILS].length})", id: "#{month}"
_div.row do
_div.col_sm_6 do
_ul.list_group do
_li.list_group_item.active.list_group_item_info "Top Ten Email Senders"
ctr = 0
data[MailUtils::MAILCOUNT].each do |id, num|
if num > (data[MailUtils::MAILS].length / 10)
_li.list_group_item.list_group_item_warning "#{id} wrote: #{num}"
else
_li.list_group_item "#{id} wrote: #{num}"
end
ctr += 1
break if ctr >= 10
end
end
end
_div.col_sm_6 do
_ul.list_group do
_li.list_group_item.list_group_item_info "Long Tail - All Senders"
_li.list_group_item do
data[MailUtils::MAILCOUNT].each do |name, num|
id = (name.match(/.+[(](\w+)/) || [])[1]
if cohorts['cohorts'].has_key?(id)
_span! "#{name} (#{num}), ", class: "#{cohorts['cohorts'][id]}"
else
_span! "#{name} (#{num}), ", class: "#{cohorts['cohorts'][COHORT_STYLES['Non-member']]}"
end
end
end
end
end
end
end
end
# Display weekly statistics for non-tool emails
def display_weekly(months:, nondiscuss:)
weeks = Hash.new {|h, k| h[k] = {}}
months.sort.each do |month|
data = MailUtils.get_mails_month(mailroot: SRV_MAIL, yearmonth: month, nondiscuss: nondiscuss)
next if data.empty?
# accumulate all mails in order for weeks, through all months
data[MailUtils::MAILS].each do |m|
d = Date.parse(m['date'])
wn = d.strftime('%G-W%V')
if weeks.has_key?(wn)
weeks[wn][m['who']] +=1
else
weeks[wn] = Hash.new{ 0 }
weeks[wn][m['who']] = 1
end
end
end
_h1 "#{LIST_ROOT}@ list emails weekly statistics", id: "top"
_div.row do
_div.col.col_sm_offset_1.col_sm_9 do
weeks.sort.reverse.each do |week, senders|
total = 0
senders.each do |sender, count|
next if /@@/ =~ sender
total += count
end
senders[WEEK_TOTAL] = total
_ul.list_group do
_li.list_group_item.active.list_group_item_info "Week #{week} Top Senders (total mails: #{senders[WEEK_TOTAL]})", id: "#{week}"
ctr = 0
senders.sort_by {|k,v| -v}.to_h.each do |id, num|
next if /@@/ =~ id
if (num > 7) && (num > (senders[WEEK_TOTAL] / 5)) # Ignore less than one per day
_li.list_group_item.list_group_item_danger "#{id} wrote: #{num}"
elsif (num > 7) && (num > (senders[WEEK_TOTAL] / 10))
_li.list_group_item.list_group_item_warning "#{id} wrote: #{num}"
elsif (num > 7) && (num > (senders[WEEK_TOTAL] / 20))
_li.list_group_item.list_group_item_info "#{id} wrote: #{num}"
else
_li.list_group_item "#{id} wrote: #{num}"
end
ctr += 1
break if ctr >= 5
end
end
end
end
end
end
# produce HTML
_html do
_body? do
_whimsy_body(
title: PAGETITLE,
related: {
"/members/index" => "More Member-Specific Tools",
"/officers/list-traffic" => "Board@ List Traffic",
"#{ENV['SCRIPT_NAME']}" => "Members@ List Traffic By Month",
"#{ENV['SCRIPT_NAME']}?week" => "Members@ List Traffic By Week",
"https://github.com/apache/whimsy/blob/master/www#{ENV['SCRIPT_NAME']}" => "See This Source Code"
},
helpblock: -> {
_p %{
This script displays simple (and likely slightly lossy) analysis of traffic on the #{LIST_ROOT}@ mailing list.
In particular, mapping From: email to a committer may not work (meaning individual senders may have multiple spots
or be miscategorized). Work in progress. Server only stores last year of mail.
}
_p do
_ 'Senders of more than 10% of all emails in a month are highlighted. '
_ 'Senders of more than 20%, 10%, or 5% of all emails in a week are highlighted in the '
_a 'By week view (supply ?week in URL).', href: '?week'
end
_p do
_ 'For the All Senders column, Members are colorized by approximate years of membership like so: '
_br
COHORT_STYLES.each do |name, style|
_span "#{name}, ", class: "#{style}"
end
_ ' note that due to email address variations, some entries may be incorrectly marked.'
end
}
) do
months = Dir["#{SRV_MAIL}/*"].map {|path| File.basename(path).untaint}.grep(/^\d+$/)
attendance = MeetingUtil.get_attendance(ASF::SVN['Meetings'])
style_cohorts(attendance)
if ENV['QUERY_STRING'].include? 'Clear-Cache-No-Really'
_p do # Danger, Will Robinson!
_ 'Note: deleting cached .json files: '
cache = Dir["#{SRV_MAIL}/??????.json"]
ctr = 0
cache.each do |f|
File.delete(f.untaint)
ctr += 1
end
_ "Successfully deleted #{ctr} files (will be rebuilt now)."
end
end
if ENV['QUERY_STRING'].include? 'week'
display_weekly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"])
else
display_monthly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"], cohorts: attendance)
end
end
end
end
# Return just sorted data counts as JSON
_json do
months = Dir["#{SRV_MAIL}/*"].map {|path| File.basename(path).untaint}.grep(/^\d+$/)
data = Hash.new {|h, k| h[k] = {} }
months.sort.reverse.each do |month|
tmp = MailUtils.get_mails_month(mailroot: SRV_MAIL, yearmonth: month, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"])
next if tmp.empty?
data[month][MailUtils::TOOLCOUNT] = tmp[MailUtils::TOOLCOUNT]
data[month][MailUtils::MAILCOUNT] = tmp[MailUtils::MAILCOUNT]
end
data
end