blob: 60519a48ed5eb43504f1e87bab92e3220797b5ee [file] [log] [blame]
#!/usr/bin/env ruby
# PonyAPI: utilities for downloading Ponymail APIs (stats.lua or mbox.lua)
# See also: https://ponymail.incubator.apache.org/docs/api
require 'json'
require 'csv'
require 'net/http'
require 'cgi'
# Utilities for downloading from Ponymail APIs
module PonyAPI
PONYHOST = ENV['PONYHOST'] || 'https://lists.apache.org/'
PONYSTATS = PONYHOST + 'api/stats.lua?list=%{list}&domain=%{domain}&d=%{year}-%{month}' # board&domain=apache.org&d=2017-04 becomes board-apache-org-201704-stats.json
STATSMBOX = "%{list}-%{domNoDot}-%<year>.04d%<month>.02d-stats.json" # used to generate output file name
PONYMBOX = PONYHOST + 'api/mbox.lua?list=%{list}@%{domain}&date=%{year}-%{month}' # board@apache.org&date=2016-06 becomes board-apache-org-201707.mbox
FILEMBOX = "%{list}-%{domNoDot}-%<year>.04d%<month>.02d.mbox" # used to generate output file name
PONYPREFS = PONYHOST + 'api/preferences.lua' # => preferences.json
extend self
# Get summary of all mailing lists by domain
# Must supply cookie = 'ponymail-logged-in-cookie' to show private lists
# if cookie == 'prompt' and the script is run interactively then
# you will be prompted to enter the cookie value
# The method writes the file 'lists.json' if dir != nil
# it returns the data as a hash
def get_pony_lists(dir, cookie=nil, sort_list=false)
jzon = get_pony_prefs(nil, cookie)
lists = jzon['lists']
if lists
if dir
# no real point sorting unless writing the file
lists = Hash[lists.sort] if sort_list
File.open(File.join("#{dir}", 'lists.json'), "w") do |f|
begin
f.puts JSON.pretty_generate(lists)
rescue JSON::GeneratorError => e
puts "WARN:get_pony_lists() #{e.message} #{e.backtrace[0]}, continuing without pretty"
f.puts lists
end
end
end
end
lists
end
# Get preferences including a summary of all mailing lists by domain
# Must supply cookie = 'ponymail-logged-in-cookie' to show private lists
# if cookie == 'prompt' and the script is run interactively then
# you will be prompted to enter the cookie value
# The method writes the file 'preferences.json' if dir != nil
# it returns the data as a hash
def get_pony_prefs(dir, cookie=nil, sort_list=false)
cookie=get_cookie() if cookie == 'prompt'
uri, request, response = fetch_pony(PONYPREFS, cookie)
jzon = {}
if response.code == '200' then
jzon = JSON.parse(response.body)
if dir
# no real point sorting unless writing the file
jzon['lists'] = Hash[jzon['lists'].sort] if sort_list && jzon['lists']
File.open(File.join("#{dir}", 'preferences.json'), "w") do |f|
begin
f.puts JSON.pretty_generate(jzon)
rescue JSON::GeneratorError
puts "WARN:get_pony_prefs(#{uri.request_uri}) #{e.message} #{e.backtrace[0]}, continuing without pretty"
f.puts jzon
end
end
end
else
puts "ERROR:get_pony_prefs(#{uri.request_uri}) returned code #{response.code.inspect}"
end
if cookie
unless jzon['login'] && jzon['login']['credentials']
puts "WARN:get_pony_prefs(#{uri.request_uri}) failed cookie test"
end
end
jzon
end
# Download one month of stats as a JSON
# Must supply cookie = 'ponymail-logged-in-cookie' if a private list
def get_pony_stats(dir, list, subdomain, year, month, cookie)
args = make_args(list, subdomain, year, month)
uri, request, response = fetch_pony(PONYSTATS % args, cookie)
if response.code == '200' then
File.open(File.join(dir, STATSMBOX % args), "w") do |f|
begin
f.puts JSON.pretty_generate(JSON.parse(response.body))
rescue JSON::JSONError
begin
# If JSON threw error, try again forcing to UTF-8 (may lose data)
jzon = JSON.parse(response.body.encode('UTF-8', :invalid => :replace, :undef => :replace))
f.puts JSON.fast_generate(jzon, {:max_nesting => false, :indent => ' '})
rescue JSON::JSONError => e
puts "WARN:get_pony_stats(#{uri.request_uri}) #{e.message} #{e.backtrace[0]}, continuing without pretty"
f.puts jzon
end
end
end
else
puts "ERROR:get_pony_stats(#{uri.request_uri}) returned code #{response.code.inspect}"
end
end
# Get multiple years/months of public stats as json
def get_pony_stats_many(dir, list, subdomain, years, months, cookie)
years.each do |y|
months.each do |m|
get_pony_stats dir, list, subdomain, y, m, cookie
end
end
end
# Download one month as mbox
# Caveats: uses response's encoding; overwrites existing .json file
# Must supply cookie = 'ponymail-logged-in-cookie' if a private list
def get_pony_mbox(dir, list, subdomain, year, month, cookie)
args = make_args(list, subdomain, year, month)
uri, request, response = fetch_pony(PONYMBOX % args, cookie)
if response.code == '200'
File.open(File.join(dir, FILEMBOX % args), "w:#{response.body.encoding}") do |f|
f.puts response.body
end
else
puts "ERROR:get_public_mbox(#{uri}) returned code #{response.code.inspect}"
end
end
# Get multiple years/months of mboxes
def get_pony_mbox_many(dir, list, subdomain, years, months, cookie)
years.each do |y|
months.each do |m|
get_pony_mbox dir, list, subdomain, y, m, cookie
end
sleep(1) # Be nice to the server; mboxes take effort
end
end
private
# create an argument list suitable for string formatting
def make_args(list, subdomain, year, month)
if subdomain.nil? || subdomain == ''
domain = 'apache.org'
else
domain = subdomain + '.apache.org'
end
{list: list, subdomain: subdomain, domain: domain, domNoDot: domain.gsub('.','-'),year: year, month: month}
end
def get_cookie()
unless $stdin.isatty
puts "WARN:Input is not a tty; cannot prompt for a cookie"
return nil
end
require 'io/console'
$stdin.getpass('Please provide the login cookie: ')
end
# Fetch a Ponymail API, with optional logged-in cookie
def fetch_pony(uri, cookie)
uri = URI.parse(uri)
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |https|
request = Net::HTTP::Get.new(uri.request_uri)
request['Cookie'] = "ponymail=#{cookie}" if cookie != ''
response = https.request(request)
if response.code =~ /^3\d\d/
fetch_pony response['location'], cookie
else
return uri, request, response
end
end
end
end
if __FILE__ == $0
method = ARGV.shift
if method == '-v'
verbose = true
method = ARGV.shift
else
verbose = false
end
if method
meth = PonyAPI.method(method)
# process args to allow use of nil, true, false
args = ARGV.map do |arg|
case arg
when 'nil'
nil
when 'true'
true
when 'false'
false
else
arg
end
end
$stderr.puts "Calling #{method}() using #{args.inspect}"
res = meth.call(*args)
puts res.inspect if verbose
end
# PonyAPI.get_pony_mbox('.', 'dev', 'whimsical', 2017, 01, nil)
# PonyAPI.get_pony_stats('.', 'dev', 'whimsical', 2017, 01, nil)
# puts PonyAPI.get_pony_prefs(nil, nil, true)['login'].inspect
# puts PonyAPI.get_pony_lists(nil,nil,true).keys.length
end