#!/usr/bin/env ruby
PAGETITLE = "Members@ Mailing List Statistics" # Wvisible:members
$LOAD_PATH.unshift '/srv/whimsy/lib'

require 'wunderbar'
require 'wunderbar/bootstrap'
require 'wunderbar/jquery'
require 'whimsy/asf'
require 'whimsy/asf/agenda'
require 'date'
require 'mail'
require '../../tools/mboxhdr2csv.rb'
require_relative 'meeting-util'

user = ASF::Person.new($USER)
unless user.asf_member?
  print "Status: 401 Unauthorized\r\n"
  print "WWW-Authenticate: Basic realm=\"ASF Members\"\r\n\r\n"
  exit
end

# Return sorted data in JSON format if the query string includes 'json'
ENV['HTTP_ACCEPT'] = 'application/json' if ENV['QUERY_STRING'].include? 'json'

LIST_ROOT = 'members'
SRV_MAIL = "/srv/mail/#{LIST_ROOT}"

WEEK_TOTAL = '@@total' # Use @@ so it can't match who name/emails
WEEK_START = '@@start'
COHORT_STYLES = { # TODO find better ways to colorize
  'Zero to two years' => 'text-warning',
  'Two to five years' => 'text-success',
  'Five to ten years' => 'text-info',
  'Ten or more years' => 'text-primary',
  'Non-members' => 'text-muted'
}

# Define simple styles for various 'ages' of Members
# 1-2 years, 3-5 years, 5-10 years, 10+ years 
def style_cohorts(cohorts)
  today = Date.today.year
  cohorts['cohorts'].each do |id, date|
    case date[0,4].to_i
    when (today-1)..today
      cohorts['cohorts'][id] = COHORT_STYLES['Zero to two years']
    when (today-5)...(today-1)
      cohorts['cohorts'][id] = COHORT_STYLES['Two to five years']
    when (today-10)...(today-5)
      cohorts['cohorts'][id] = COHORT_STYLES['Five to ten years']
    else
      cohorts['cohorts'][id] = COHORT_STYLES['Ten or more years']
    end
  end
end

# Display monthly statistics for all available data
def display_monthly(months:, nondiscuss:, cohorts:)
  months.sort.reverse.each do |month|
    data = MailUtils.get_mails_month(mailroot: SRV_MAIL, yearmonth: month, nondiscuss: nondiscuss)
    next if data.empty?
    _h1 "#{LIST_ROOT}@ statistics for #{month} (total mails: #{data[MailUtils::MAILS].length})", id: "#{month}"
    _div.row do
      _div.col_sm_6 do
        _ul.list_group do
          _li.list_group_item.active.list_group_item_info "Top Ten Email Senders"
          ctr = 0
          data[MailUtils::MAILCOUNT].each do |id, num|
            if num > (data[MailUtils::MAILS].length / 10)
              _li.list_group_item.list_group_item_warning "#{id} wrote: #{num}"
            else
              _li.list_group_item "#{id} wrote: #{num}"
            end
            ctr += 1
            break if ctr >= 10
          end
        end   
      end
      _div.col_sm_6 do
        _ul.list_group do
          _li.list_group_item.list_group_item_info "Long Tail - All Senders"
          _li.list_group_item do
            data[MailUtils::MAILCOUNT].each do |name, num|
              id = (name.match(/.+[(](\w+)/) || [])[1]
              if cohorts['cohorts'].has_key?(id)
                _span! "#{name} (#{num}), ", class: "#{cohorts['cohorts'][id]}"
              else
                _span! "#{name} (#{num}), ", class: "#{cohorts['cohorts'][COHORT_STYLES['Non-member']]}"
              end
            end
          end
        end
      end
    end
  end
end

# Display weekly statistics for non-tool emails
def display_weekly(months:, nondiscuss:)
  weeks = Hash.new {|h, k| h[k] = {}}
  months.sort.each do |month|
    data = MailUtils.get_mails_month(mailroot: SRV_MAIL, yearmonth: month, nondiscuss: nondiscuss)
    next if data.empty?
    # accumulate all mails in order for weeks, through all months
    data[MailUtils::MAILS].each do |m|
      d = Date.parse(m['date'])
      wn = d.strftime('%G-W%V')
      if weeks.has_key?(wn)
        weeks[wn][m['who']] +=1
      else
        weeks[wn] = Hash.new{ 0 }
        weeks[wn][m['who']] = 1
      end
    end
  end
  _h1 "#{LIST_ROOT}@ list emails weekly statistics", id: "top"
  _div.row do
    _div.col.col_sm_offset_1.col_sm_9 do
      weeks.sort.reverse.each do |week, senders|
        total = 0
        senders.each do |sender, count|
          next if /@@/ =~ sender
          total += count
        end
        senders[WEEK_TOTAL] = total
        _ul.list_group do
          _li.list_group_item.active.list_group_item_info "Week #{week} Top Senders (total mails: #{senders[WEEK_TOTAL]})", id: "#{week}"
          ctr = 0
          senders.sort_by {|k,v| -v}.to_h.each do |id, num|
            next if /@@/ =~ id
            if (num > 7) && (num > (senders[WEEK_TOTAL] / 5)) # Ignore less than one per day 
              _li.list_group_item.list_group_item_danger "#{id} wrote: #{num}"
            elsif (num > 7) && (num > (senders[WEEK_TOTAL] / 10))
              _li.list_group_item.list_group_item_warning "#{id} wrote: #{num}"
            elsif (num > 7) && (num > (senders[WEEK_TOTAL] / 20))
              _li.list_group_item.list_group_item_info "#{id} wrote: #{num}"
            else
              _li.list_group_item "#{id} wrote: #{num}"
            end
            ctr += 1
            break if ctr >= 5
          end
        end
      end
    end
  end
end

# produce HTML
_html do
  _body? do
    _whimsy_body(
      title: PAGETITLE,
      related: {
        "/members/index" => "More Member-Specific Tools",
        "/officers/list-traffic" => "Board@ List Traffic",
        "#{ENV['SCRIPT_NAME']}" => "Members@ List Traffic By Month",
        "#{ENV['SCRIPT_NAME']}?week" => "Members@ List Traffic By Week",
        "https://github.com/apache/whimsy/blob/master/www#{ENV['SCRIPT_NAME']}" => "See This Source Code"
      },
      helpblock: -> {
        _p %{
          This script displays simple (and likely slightly lossy) analysis of traffic on the #{LIST_ROOT}@ mailing list.
          In particular, mapping From: email to a committer may not work (meaning individual senders may have multiple spots
          or be miscategorized).  Work in progress.  Server only stores last year of mail.
        }
        _p do
          _ 'Senders of more than 10% of all emails in a month are highlighted. '
          _ 'Senders of more than 20%, 10%, or 5% of all emails in a week are highlighted in the '
          _a 'By week view (supply ?week in URL).', href: '?week'
        end
        _p do
          _ 'For the All Senders column, Members are colorized by approximate years of membership like so: '
          _br
          COHORT_STYLES.each do |name, style|
            _span "#{name}, ", class: "#{style}"
          end
          _ ' note that due to email address variations, some entries may be incorrectly marked.'
        end
      }
    ) do
      months = Dir["#{SRV_MAIL}/*"].map {|path| File.basename(path).untaint}.grep(/^\d+$/)
      attendance = MeetingUtil.get_attendance(ASF::SVN['Meetings'])
      style_cohorts(attendance) if attendance.has_key?('cohorts') # Allow to fail silently if data missing
      # if ENV['QUERY_STRING'].include? 'Clear-Cache-No-Really'
      #   _p do # Danger, Will Robinson!
      #     _ 'Note: deleting cached .json files: '
      #     cache = Dir["#{SRV_MAIL}/??????.json"]
      #     ctr = 0
      #     cache.each do |f|
      #       File.delete(f.untaint)
      #       ctr += 1
      #     end
      #     _ "Successfully deleted #{ctr} files (will be rebuilt now)."
      #   end
      # end
      if ENV['QUERY_STRING'].include? 'week'
        display_weekly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"])
      else
        display_monthly(months: months, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"], cohorts: attendance)
      end
    end
  end
end

# Return just sorted data counts as JSON
_json do
  months = Dir["#{SRV_MAIL}/*"].map {|path| File.basename(path).untaint}.grep(/^\d+$/)
  data = Hash.new {|h, k| h[k] = {} }
  months.sort.reverse.each do |month|
    tmp = MailUtils.get_mails_month(mailroot: SRV_MAIL, yearmonth: month, nondiscuss: MailUtils::NONDISCUSSION_SUBJECTS["<#{LIST_ROOT}.apache.org>"])
    next if tmp.empty?
    data[month][MailUtils::TOOLCOUNT] = tmp[MailUtils::TOOLCOUNT]
    data[month][MailUtils::MAILCOUNT] = tmp[MailUtils::MAILCOUNT]
  end
  data
end
